diff options
author | Ed Schouten <ed@FreeBSD.org> | 2009-06-02 17:52:33 +0000 |
---|---|---|
committer | Ed Schouten <ed@FreeBSD.org> | 2009-06-02 17:52:33 +0000 |
commit | 009b1c42aa6266385f2c37e227516b24077e6dd7 (patch) | |
tree | 64ba909838c23261cace781ece27d106134ea451 /lib/CodeGen/SelectionDAG | |
download | src-009b1c42aa6266385f2c37e227516b24077e6dd7.tar.gz src-009b1c42aa6266385f2c37e227516b24077e6dd7.zip |
Notes
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
25 files changed, 39316 insertions, 0 deletions
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt new file mode 100644 index 000000000000..9ea59ea80c61 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -0,0 +1,22 @@ +add_llvm_library(LLVMSelectionDAG + CallingConvLower.cpp + DAGCombiner.cpp + FastISel.cpp + LegalizeDAG.cpp + LegalizeFloatTypes.cpp + LegalizeIntegerTypes.cpp + LegalizeTypes.cpp + LegalizeTypesGeneric.cpp + LegalizeVectorOps.cpp + LegalizeVectorTypes.cpp + ScheduleDAGSDNodes.cpp + ScheduleDAGSDNodesEmit.cpp + ScheduleDAGFast.cpp + ScheduleDAGList.cpp + ScheduleDAGRRList.cpp + SelectionDAGBuild.cpp + SelectionDAG.cpp + SelectionDAGISel.cpp + SelectionDAGPrinter.cpp + TargetLowering.cpp + ) diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp new file mode 100644 index 000000000000..7cd2b73e8704 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp @@ -0,0 +1,148 @@ +//===-- CallingConvLower.cpp - Calling Conventions ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CCState class, used for lowering and implementing +// calling conventions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm, + SmallVector<CCValAssign, 16> &locs) + : CallingConv(CC), IsVarArg(isVarArg), TM(tm), + TRI(*TM.getRegisterInfo()), Locs(locs) { + // No stack is used. + StackOffset = 0; + + UsedRegs.resize((TRI.getNumRegs()+31)/32); +} + +// HandleByVal - Allocate a stack slot large enough to pass an argument by +// value. The size and alignment information of the argument is encoded in its +// parameter attribute. +void CCState::HandleByVal(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + int MinSize, int MinAlign, + ISD::ArgFlagsTy ArgFlags) { + unsigned Align = ArgFlags.getByValAlign(); + unsigned Size = ArgFlags.getByValSize(); + if (MinSize > (int)Size) + Size = MinSize; + if (MinAlign > (int)Align) + Align = MinAlign; + unsigned Offset = AllocateStack(Size, Align); + + addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); +} + +/// MarkAllocated - Mark a register and all of its aliases as allocated. +void CCState::MarkAllocated(unsigned Reg) { + UsedRegs[Reg/32] |= 1 << (Reg&31); + + if (const unsigned *RegAliases = TRI.getAliasSet(Reg)) + for (; (Reg = *RegAliases); ++RegAliases) + UsedRegs[Reg/32] |= 1 << (Reg&31); +} + +/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, +/// incorporating info about the formals into this state. +void CCState::AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn) { + unsigned NumArgs = TheArgs->getNumValues()-1; + + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ArgVT = TheArgs->getValueType(i); + ISD::ArgFlagsTy ArgFlags = + cast<ARG_FLAGSSDNode>(TheArgs->getOperand(3+i))->getArgFlags(); + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { + cerr << "Formal argument #" << i << " has unhandled type " + << ArgVT.getMVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeReturn - Analyze the returned values of an ISD::RET node, +/// incorporating info about the result values into this state. +void CCState::AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = TheRet->getNumOperands() / 2; i != e; ++i) { + MVT VT = TheRet->getOperand(i*2+1).getValueType(); + ISD::ArgFlagsTy ArgFlags = + cast<ARG_FLAGSSDNode>(TheRet->getOperand(i*2+2))->getArgFlags(); + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)){ + cerr << "Return operand #" << i << " has unhandled type " + << VT.getMVTString() << "\n"; + abort(); + } + } +} + + +/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info +/// about the passed values into this state. +void CCState::AnalyzeCallOperands(CallSDNode *TheCall, CCAssignFn Fn) { + unsigned NumOps = TheCall->getNumArgs(); + for (unsigned i = 0; i != NumOps; ++i) { + MVT ArgVT = TheCall->getArg(i).getValueType(); + ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i); + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { + cerr << "Call operand #" << i << " has unhandled type " + << ArgVT.getMVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallOperands - Same as above except it takes vectors of types +/// and argument flags. +void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &Flags, + CCAssignFn Fn) { + unsigned NumOps = ArgVTs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + MVT ArgVT = ArgVTs[i]; + ISD::ArgFlagsTy ArgFlags = Flags[i]; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { + cerr << "Call operand #" << i << " has unhandled type " + << ArgVT.getMVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallResult(CallSDNode *TheCall, CCAssignFn Fn) { + for (unsigned i = 0, e = TheCall->getNumRetVals(); i != e; ++i) { + MVT VT = TheCall->getRetValType(i); + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (TheCall->isInreg()) + Flags.setInReg(); + if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { + cerr << "Call result #" << i << " has unhandled type " + << VT.getMVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallResult - Same as above except it's specialized for calls which +/// produce a single value. +void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { + if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { + cerr << "Call result has unhandled type " + << VT.getMVTString() << "\n"; + abort(); + } +} diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp new file mode 100644 index 000000000000..4c1710dd81fa --- /dev/null +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -0,0 +1,6203 @@ +//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run +// both before and after the DAG is legalized. +// +// This pass is not a substitute for the LLVM IR instcombine pass. This pass is +// primarily intended to handle simplification opportunities that are implicit +// in the LLVM IR and exposed by the various codegen lowering phases. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dagcombine" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <set> +using namespace llvm; + +STATISTIC(NodesCombined , "Number of dag nodes combined"); +STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); +STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); +STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); + +namespace { + static cl::opt<bool> + CombinerAA("combiner-alias-analysis", cl::Hidden, + cl::desc("Turn on alias analysis during testing")); + + static cl::opt<bool> + CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, + cl::desc("Include global information in alias analysis")); + +//------------------------------ DAGCombiner ---------------------------------// + + class VISIBILITY_HIDDEN DAGCombiner { + SelectionDAG &DAG; + const TargetLowering &TLI; + CombineLevel Level; + CodeGenOpt::Level OptLevel; + bool LegalOperations; + bool LegalTypes; + + // Worklist of all of the nodes that need to be simplified. + std::vector<SDNode*> WorkList; + + // AA - Used for DAG load/store alias analysis. + AliasAnalysis &AA; + + /// AddUsersToWorkList - When an instruction is simplified, add all users of + /// the instruction to the work lists because they might get more simplified + /// now. + /// + void AddUsersToWorkList(SDNode *N) { + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) + AddToWorkList(*UI); + } + + /// visit - call the node-specific routine that knows how to fold each + /// particular type of node. + SDValue visit(SDNode *N); + + public: + /// AddToWorkList - Add to the work list making sure it's instance is at the + /// the back (next to be processed.) + void AddToWorkList(SDNode *N) { + removeFromWorkList(N); + WorkList.push_back(N); + } + + /// removeFromWorkList - remove all instances of N from the worklist. + /// + void removeFromWorkList(SDNode *N) { + WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N), + WorkList.end()); + } + + SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, + bool AddTo = true); + + SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { + return CombineTo(N, &Res, 1, AddTo); + } + + SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, + bool AddTo = true) { + SDValue To[] = { Res0, Res1 }; + return CombineTo(N, To, 2, AddTo); + } + + void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); + + private: + + /// SimplifyDemandedBits - Check the specified integer node value to see if + /// it can be simplified or if things it uses can be simplified by bit + /// propagation. If so, return true. + bool SimplifyDemandedBits(SDValue Op) { + APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits()); + return SimplifyDemandedBits(Op, Demanded); + } + + bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); + + bool CombineToPreIndexedLoadStore(SDNode *N); + bool CombineToPostIndexedLoadStore(SDNode *N); + + + /// combine - call the node-specific routine that knows how to fold each + /// particular type of node. If that doesn't do anything, try the + /// target-specific DAG combines. + SDValue combine(SDNode *N); + + // Visitation implementation - Implement dag node combining for different + // node types. The semantics are as follows: + // Return Value: + // SDValue.getNode() == 0 - No change was made + // SDValue.getNode() == N - N was replaced, is dead and has been handled. + // otherwise - N should be replaced by the returned Operand. + // + SDValue visitTokenFactor(SDNode *N); + SDValue visitMERGE_VALUES(SDNode *N); + SDValue visitADD(SDNode *N); + SDValue visitSUB(SDNode *N); + SDValue visitADDC(SDNode *N); + SDValue visitADDE(SDNode *N); + SDValue visitMUL(SDNode *N); + SDValue visitSDIV(SDNode *N); + SDValue visitUDIV(SDNode *N); + SDValue visitSREM(SDNode *N); + SDValue visitUREM(SDNode *N); + SDValue visitMULHU(SDNode *N); + SDValue visitMULHS(SDNode *N); + SDValue visitSMUL_LOHI(SDNode *N); + SDValue visitUMUL_LOHI(SDNode *N); + SDValue visitSDIVREM(SDNode *N); + SDValue visitUDIVREM(SDNode *N); + SDValue visitAND(SDNode *N); + SDValue visitOR(SDNode *N); + SDValue visitXOR(SDNode *N); + SDValue SimplifyVBinOp(SDNode *N); + SDValue visitSHL(SDNode *N); + SDValue visitSRA(SDNode *N); + SDValue visitSRL(SDNode *N); + SDValue visitCTLZ(SDNode *N); + SDValue visitCTTZ(SDNode *N); + SDValue visitCTPOP(SDNode *N); + SDValue visitSELECT(SDNode *N); + SDValue visitSELECT_CC(SDNode *N); + SDValue visitSETCC(SDNode *N); + SDValue visitSIGN_EXTEND(SDNode *N); + SDValue visitZERO_EXTEND(SDNode *N); + SDValue visitANY_EXTEND(SDNode *N); + SDValue visitSIGN_EXTEND_INREG(SDNode *N); + SDValue visitTRUNCATE(SDNode *N); + SDValue visitBIT_CONVERT(SDNode *N); + SDValue visitBUILD_PAIR(SDNode *N); + SDValue visitFADD(SDNode *N); + SDValue visitFSUB(SDNode *N); + SDValue visitFMUL(SDNode *N); + SDValue visitFDIV(SDNode *N); + SDValue visitFREM(SDNode *N); + SDValue visitFCOPYSIGN(SDNode *N); + SDValue visitSINT_TO_FP(SDNode *N); + SDValue visitUINT_TO_FP(SDNode *N); + SDValue visitFP_TO_SINT(SDNode *N); + SDValue visitFP_TO_UINT(SDNode *N); + SDValue visitFP_ROUND(SDNode *N); + SDValue visitFP_ROUND_INREG(SDNode *N); + SDValue visitFP_EXTEND(SDNode *N); + SDValue visitFNEG(SDNode *N); + SDValue visitFABS(SDNode *N); + SDValue visitBRCOND(SDNode *N); + SDValue visitBR_CC(SDNode *N); + SDValue visitLOAD(SDNode *N); + SDValue visitSTORE(SDNode *N); + SDValue visitINSERT_VECTOR_ELT(SDNode *N); + SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); + SDValue visitBUILD_VECTOR(SDNode *N); + SDValue visitCONCAT_VECTORS(SDNode *N); + SDValue visitVECTOR_SHUFFLE(SDNode *N); + + SDValue XformToShuffleWithZero(SDNode *N); + SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); + + SDValue visitShiftByConstant(SDNode *N, unsigned Amt); + + bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); + SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); + SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2); + SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, + SDValue N3, ISD::CondCode CC, + bool NotExtCompare = false); + SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, + DebugLoc DL, bool foldBooleans = true); + SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, + unsigned HiOp); + SDValue CombineConsecutiveLoads(SDNode *N, MVT VT); + SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT); + SDValue BuildSDIV(SDNode *N); + SDValue BuildUDIV(SDNode *N); + SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); + SDValue ReduceLoadWidth(SDNode *N); + SDValue ReduceLoadOpStoreWidth(SDNode *N); + + SDValue GetDemandedBits(SDValue V, const APInt &Mask); + + /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, + /// looking for aliasing nodes and adding them to the Aliases vector. + void GatherAllAliases(SDNode *N, SDValue OriginalChain, + SmallVector<SDValue, 8> &Aliases); + + /// isAlias - Return true if there is any possibility that the two addresses + /// overlap. + bool isAlias(SDValue Ptr1, int64_t Size1, + const Value *SrcValue1, int SrcValueOffset1, + SDValue Ptr2, int64_t Size2, + const Value *SrcValue2, int SrcValueOffset2) const; + + /// FindAliasInfo - Extracts the relevant alias information from the memory + /// node. Returns true if the operand was a load. + bool FindAliasInfo(SDNode *N, + SDValue &Ptr, int64_t &Size, + const Value *&SrcValue, int &SrcValueOffset) const; + + /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, + /// looking for a better chain (aliasing node.) + SDValue FindBetterChain(SDNode *N, SDValue Chain); + + /// getShiftAmountTy - Returns a type large enough to hold any valid + /// shift amount - before type legalization these can be huge. + MVT getShiftAmountTy() { + return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); + } + +public: + DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) + : DAG(D), + TLI(D.getTargetLoweringInfo()), + Level(Unrestricted), + OptLevel(OL), + LegalOperations(false), + LegalTypes(false), + AA(A) {} + + /// Run - runs the dag combiner on all nodes in the work list + void Run(CombineLevel AtLevel); + }; +} + + +namespace { +/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted +/// nodes from the worklist. +class VISIBILITY_HIDDEN WorkListRemover : + public SelectionDAG::DAGUpdateListener { + DAGCombiner &DC; +public: + explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {} + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + DC.removeFromWorkList(N); + } + + virtual void NodeUpdated(SDNode *N) { + // Ignore updates. + } +}; +} + +//===----------------------------------------------------------------------===// +// TargetLowering::DAGCombinerInfo implementation +//===----------------------------------------------------------------------===// + +void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { + ((DAGCombiner*)DC)->AddToWorkList(N); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDValue Res, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); +} + + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); +} + +void TargetLowering::DAGCombinerInfo:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { + return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); +} + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +/// isNegatibleForFree - Return 1 if we can compute the negated form of the +/// specified expression for the same cost as the expression itself, or 2 if we +/// can compute the negated form more cheaply than the expression itself. +static char isNegatibleForFree(SDValue Op, bool LegalOperations, + unsigned Depth = 0) { + // No compile time optimizations on this type. + if (Op.getValueType() == MVT::ppcf128) + return 0; + + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) return 2; + + // Don't allow anything with multiple uses. + if (!Op.hasOneUse()) return 0; + + // Don't recurse exponentially. + if (Depth > 6) return 0; + + switch (Op.getOpcode()) { + default: return false; + case ISD::ConstantFP: + // Don't invert constant FP values after legalize. The negated constant + // isn't necessarily legal. + return LegalOperations ? 0 : 1; + case ISD::FADD: + // FIXME: determine better conditions for this xform. + if (!UnsafeFPMath) return 0; + + // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return V; + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + if (!UnsafeFPMath) return 0; + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return 1; + + case ISD::FMUL: + case ISD::FDIV: + if (HonorSignDependentRoundingFPMath()) return 0; + + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return V; + + return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FSIN: + return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1); + } +} + +/// GetNegatedExpression - If isNegatibleForFree returns true, this function +/// returns the newly negated expression. +static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, unsigned Depth = 0) { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); + + // Don't allow anything with multiple uses. + assert(Op.hasOneUse() && "Unknown reuse!"); + + assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); + switch (Op.getOpcode()) { + default: assert(0 && "Unknown code"); + case ISD::ConstantFP: { + APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); + V.changeSign(); + return DAG.getConstantFP(V, Op.getValueType()); + } + case ISD::FADD: + // FIXME: determine better conditions for this xform. + assert(UnsafeFPMath); + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, Depth+1), + Op.getOperand(0)); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + assert(UnsafeFPMath); + + // fold (fneg (fsub 0, B)) -> B + if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) + if (N0CFP->getValueAPF().isZero()) + return Op.getOperand(1); + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + Op.getOperand(1), Op.getOperand(0)); + + case ISD::FMUL: + case ISD::FDIV: + assert(!HonorSignDependentRoundingFPMath()); + + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + Op.getOperand(0), + GetNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, Depth+1)); + + case ISD::FP_EXTEND: + case ISD::FSIN: + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1)); + case ISD::FP_ROUND: + return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + } +} + + +// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc +// that selects between the values 1 and 0, making it equivalent to a setcc. +// Also, set the incoming LHS, RHS, and CC references to the appropriate +// nodes based on the type of node we are checking. This simplifies life a +// bit for the callers. +static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) { + if (N.getOpcode() == ISD::SETCC) { + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(2); + return true; + } + if (N.getOpcode() == ISD::SELECT_CC && + N.getOperand(2).getOpcode() == ISD::Constant && + N.getOperand(3).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 && + cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(4); + return true; + } + return false; +} + +// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only +// one use. If this is true, it allows the users to invert the operation for +// free when it is profitable to do so. +static bool isOneUseSetCC(SDValue N) { + SDValue N0, N1, N2; + if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) + return true; + return false; +} + +SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, + SDValue N0, SDValue N1) { + MVT VT = N0.getValueType(); + if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { + if (isa<ConstantSDNode>(N1)) { + // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) + SDValue OpNode = + DAG.FoldConstantArithmetic(Opc, VT, + cast<ConstantSDNode>(N0.getOperand(1)), + cast<ConstantSDNode>(N1)); + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + } else if (N0.hasOneUse()) { + // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use + SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + N0.getOperand(0), N1); + AddToWorkList(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); + } + } + + if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { + if (isa<ConstantSDNode>(N0)) { + // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) + SDValue OpNode = + DAG.FoldConstantArithmetic(Opc, VT, + cast<ConstantSDNode>(N1.getOperand(1)), + cast<ConstantSDNode>(N0)); + return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + } else if (N1.hasOneUse()) { + // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use + SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + N1.getOperand(0), N0); + AddToWorkList(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, + bool AddTo) { + assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); + ++NodesCombined; + DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(To[0].getNode()->dump(&DAG)); + DOUT << " and " << NumTo-1 << " other values\n"; + DEBUG(for (unsigned i = 0, e = NumTo; i != e; ++i) + assert(N->getValueType(i) == To[i].getValueType() && + "Cannot combine value to value of different type!")); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesWith(N, To, &DeadNodes); + + if (AddTo) { + // Push the new nodes and any users onto the worklist + for (unsigned i = 0, e = NumTo; i != e; ++i) { + if (To[i].getNode()) { + AddToWorkList(To[i].getNode()); + AddUsersToWorkList(To[i].getNode()); + } + } + } + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (N->use_empty()) { + // Nodes can be reintroduced into the worklist. Make sure we do not + // process a node that has been replaced. + removeFromWorkList(N); + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + } + return SDValue(N, 0); +} + +void +DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt & + TLO) { + // Replace all uses. If any nodes become isomorphic to other nodes and + // are deleted, make sure to remove them from our worklist. + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); + + // Push the new node and any (possibly new) users onto the worklist. + AddToWorkList(TLO.New.getNode()); + AddUsersToWorkList(TLO.New.getNode()); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (TLO.Old.getNode()->use_empty()) { + removeFromWorkList(TLO.Old.getNode()); + + // If the operands of this node are only used by the node, they will now + // be dead. Make sure to visit them first to delete dead nodes early. + for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) + if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) + AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); + + DAG.DeleteNode(TLO.Old.getNode()); + } +} + +/// SimplifyDemandedBits - Check the specified integer node value to see if +/// it can be simplified or if things it uses can be simplified by bit +/// propagation. If so, return true. +bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { + TargetLowering::TargetLoweringOpt TLO(DAG); + APInt KnownZero, KnownOne; + if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) + return false; + + // Revisit the node. + AddToWorkList(Op.getNode()); + + // Replace the old value with the new one. + ++NodesCombined; + DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG)); + DOUT << '\n'; + + CommitTargetLoweringOpt(TLO); + return true; +} + +//===----------------------------------------------------------------------===// +// Main DAG Combiner implementation +//===----------------------------------------------------------------------===// + +void DAGCombiner::Run(CombineLevel AtLevel) { + // set the instance variables, so that the various visit routines may use it. + Level = AtLevel; + LegalOperations = Level >= NoIllegalOperations; + LegalTypes = Level >= NoIllegalTypes; + + // Add all the dag nodes to the worklist. + WorkList.reserve(DAG.allnodes_size()); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) + WorkList.push_back(I); + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + + // The root of the dag may dangle to deleted nodes until the dag combiner is + // done. Set it to null to avoid confusion. + DAG.setRoot(SDValue()); + + // while the worklist isn't empty, inspect the node on the end of it and + // try and combine it. + while (!WorkList.empty()) { + SDNode *N = WorkList.back(); + WorkList.pop_back(); + + // If N has no uses, it is dead. Make sure to revisit all N's operands once + // N is deleted from the DAG, since they too may now be dead or may have a + // reduced number of uses, allowing other xforms. + if (N->use_empty() && N != &Dummy) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + AddToWorkList(N->getOperand(i).getNode()); + + DAG.DeleteNode(N); + continue; + } + + SDValue RV = combine(N); + + if (RV.getNode() == 0) + continue; + + ++NodesCombined; + + // If we get back the same node we passed in, rather than a new node or + // zero, we know that the node must have defined multiple values and + // CombineTo was used. Since CombineTo takes care of the worklist + // mechanics for us, we have no work to do in this case. + if (RV.getNode() == N) + continue; + + assert(N->getOpcode() != ISD::DELETED_NODE && + RV.getNode()->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned new node!"); + + DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(RV.getNode()->dump(&DAG)); + DOUT << '\n'; + WorkListRemover DeadNodes(*this); + if (N->getNumValues() == RV.getNode()->getNumValues()) + DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); + else { + assert(N->getValueType(0) == RV.getValueType() && + N->getNumValues() == 1 && "Type mismatch"); + SDValue OpV = RV; + DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes); + } + + // Push the new node and any users onto the worklist + AddToWorkList(RV.getNode()); + AddUsersToWorkList(RV.getNode()); + + // Add any uses of the old node to the worklist in case this node is the + // last one that uses them. They may become dead after this node is + // deleted. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + AddToWorkList(N->getOperand(i).getNode()); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (N->use_empty()) { + // Nodes can be reintroduced into the worklist. Make sure we do not + // process a node that has been replaced. + removeFromWorkList(N); + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + } + } + + // If the root changed (e.g. it was a dead load, update the root). + DAG.setRoot(Dummy.getValue()); +} + +SDValue DAGCombiner::visit(SDNode *N) { + switch(N->getOpcode()) { + default: break; + case ISD::TokenFactor: return visitTokenFactor(N); + case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); + case ISD::ADD: return visitADD(N); + case ISD::SUB: return visitSUB(N); + case ISD::ADDC: return visitADDC(N); + case ISD::ADDE: return visitADDE(N); + case ISD::MUL: return visitMUL(N); + case ISD::SDIV: return visitSDIV(N); + case ISD::UDIV: return visitUDIV(N); + case ISD::SREM: return visitSREM(N); + case ISD::UREM: return visitUREM(N); + case ISD::MULHU: return visitMULHU(N); + case ISD::MULHS: return visitMULHS(N); + case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); + case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); + case ISD::SDIVREM: return visitSDIVREM(N); + case ISD::UDIVREM: return visitUDIVREM(N); + case ISD::AND: return visitAND(N); + case ISD::OR: return visitOR(N); + case ISD::XOR: return visitXOR(N); + case ISD::SHL: return visitSHL(N); + case ISD::SRA: return visitSRA(N); + case ISD::SRL: return visitSRL(N); + case ISD::CTLZ: return visitCTLZ(N); + case ISD::CTTZ: return visitCTTZ(N); + case ISD::CTPOP: return visitCTPOP(N); + case ISD::SELECT: return visitSELECT(N); + case ISD::SELECT_CC: return visitSELECT_CC(N); + case ISD::SETCC: return visitSETCC(N); + case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); + case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); + case ISD::ANY_EXTEND: return visitANY_EXTEND(N); + case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); + case ISD::TRUNCATE: return visitTRUNCATE(N); + case ISD::BIT_CONVERT: return visitBIT_CONVERT(N); + case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); + case ISD::FADD: return visitFADD(N); + case ISD::FSUB: return visitFSUB(N); + case ISD::FMUL: return visitFMUL(N); + case ISD::FDIV: return visitFDIV(N); + case ISD::FREM: return visitFREM(N); + case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); + case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); + case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); + case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); + case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); + case ISD::FP_ROUND: return visitFP_ROUND(N); + case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); + case ISD::FP_EXTEND: return visitFP_EXTEND(N); + case ISD::FNEG: return visitFNEG(N); + case ISD::FABS: return visitFABS(N); + case ISD::BRCOND: return visitBRCOND(N); + case ISD::BR_CC: return visitBR_CC(N); + case ISD::LOAD: return visitLOAD(N); + case ISD::STORE: return visitSTORE(N); + case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); + case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); + case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); + case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); + case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + } + return SDValue(); +} + +SDValue DAGCombiner::combine(SDNode *N) { + SDValue RV = visit(N); + + // If nothing happened, try a target-specific DAG combine. + if (RV.getNode() == 0) { + assert(N->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned NULL!"); + + if (N->getOpcode() >= ISD::BUILTIN_OP_END || + TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { + + // Expose the DAG combiner to the target combiner impls. + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, Level == Unrestricted, false, this); + + RV = TLI.PerformDAGCombine(N, DagCombineInfo); + } + } + + // If N is a commutative binary node, try commuting it to enable more + // sdisel CSE. + if (RV.getNode() == 0 && + SelectionDAG::isCommutativeBinOp(N->getOpcode()) && + N->getNumValues() == 1) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Constant operands are canonicalized to RHS. + if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { + SDValue Ops[] = { N1, N0 }; + SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), + Ops, 2); + if (CSENode) + return SDValue(CSENode, 0); + } + } + + return RV; +} + +/// getInputChainForNode - Given a node, return its input chain if it has one, +/// otherwise return a null sd operand. +static SDValue getInputChainForNode(SDNode *N) { + if (unsigned NumOps = N->getNumOperands()) { + if (N->getOperand(0).getValueType() == MVT::Other) + return N->getOperand(0); + else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) + return N->getOperand(NumOps-1); + for (unsigned i = 1; i < NumOps-1; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) + return N->getOperand(i); + } + return SDValue(); +} + +SDValue DAGCombiner::visitTokenFactor(SDNode *N) { + // If N has two operands, where one has an input chain equal to the other, + // the 'other' chain is redundant. + if (N->getNumOperands() == 2) { + if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) + return N->getOperand(0); + if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) + return N->getOperand(1); + } + + SmallVector<SDNode *, 8> TFs; // List of token factors to visit. + SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. + SmallPtrSet<SDNode*, 16> SeenOps; + bool Changed = false; // If we should replace this token factor. + + // Start out with this token factor. + TFs.push_back(N); + + // Iterate through token factors. The TFs grows when new token factors are + // encountered. + for (unsigned i = 0; i < TFs.size(); ++i) { + SDNode *TF = TFs[i]; + + // Check each of the operands. + for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { + SDValue Op = TF->getOperand(i); + + switch (Op.getOpcode()) { + case ISD::EntryToken: + // Entry tokens don't need to be added to the list. They are + // rededundant. + Changed = true; + break; + + case ISD::TokenFactor: + if ((CombinerAA || Op.hasOneUse()) && + std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { + // Queue up for processing. + TFs.push_back(Op.getNode()); + // Clean up in case the token factor is removed. + AddToWorkList(Op.getNode()); + Changed = true; + break; + } + // Fall thru + + default: + // Only add if it isn't already in the list. + if (SeenOps.insert(Op.getNode())) + Ops.push_back(Op); + else + Changed = true; + break; + } + } + } + + SDValue Result; + + // If we've change things around then replace token factor. + if (Changed) { + if (Ops.empty()) { + // The entry token is the only possible outcome. + Result = DAG.getEntryNode(); + } else { + // New and improved token factor. + Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, &Ops[0], Ops.size()); + } + + // Don't add users to work list. + return CombineTo(N, Result, false); + } + + return Result; +} + +/// MERGE_VALUES can always be eliminated. +SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { + WorkListRemover DeadNodes(*this); + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), + &DeadNodes); + removeFromWorkList(N); + DAG.DeleteNode(N); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +static +SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, + SelectionDAG &DAG) { + MVT VT = N0.getValueType(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); + + if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && + isa<ConstantSDNode>(N00.getOperand(1))) { + // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) + N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, + DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT, + N00.getOperand(0), N01), + DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT, + N00.getOperand(1), N01)); + return DAG.getNode(ISD::ADD, DL, VT, N0, N1); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (add x, undef) -> undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + // fold (add c1, c2) -> c1+c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0); + // fold (add x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (add Sym, c) -> Sym+c + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) + if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && + GA->getOpcode() == ISD::GlobalAddress) + return DAG.getGlobalAddress(GA->getGlobal(), VT, + GA->getOffset() + + (uint64_t)N1C->getSExtValue()); + // fold ((c1-A)+c2) -> (c1+c2)-A + if (N1C && N0.getOpcode() == ISD::SUB) + if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(N1C->getAPIntValue()+ + N0C->getAPIntValue(), VT), + N0.getOperand(1)); + // reassociate add + SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1); + if (RADD.getNode() != 0) + return RADD; + // fold ((0-A) + B) -> B-A + if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && + cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1)); + // fold (A + (0-B)) -> A-B + if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && + cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1)); + // fold (A+(B-A)) -> B + if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) + return N1.getOperand(0); + // fold ((B-A)+A) -> B + if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) + return N0.getOperand(0); + // fold (A+(B-(A+C))) to (B-C) + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && + N0 == N1.getOperand(1).getOperand(0)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + N1.getOperand(1).getOperand(1)); + // fold (A+(B-(C+A))) to (B-C) + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && + N0 == N1.getOperand(1).getOperand(1)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + N1.getOperand(1).getOperand(0)); + // fold (A+((B-A)+or-C)) to (B+or-C) + if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && + N1.getOperand(0).getOpcode() == ISD::SUB && + N0 == N1.getOperand(0).getOperand(1)) + return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT, + N1.getOperand(0).getOperand(0), N1.getOperand(1)); + + // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + + if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10), + DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11)); + } + + if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (a+b) -> (a|b) iff a and b share no bits. + if (VT.isInteger() && !VT.isVector()) { + APInt LHSZero, LHSOne; + APInt RHSZero, RHSOne; + APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + + if (LHSZero.getBoolValue()) { + DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return an OR. + // If all possibly-set bits on the RHS are clear on the LHS, return an OR. + if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || + (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); + } + } + + // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) + if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { + SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG); + if (Result.getNode()) return Result; + } + if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { + SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG); + if (Result.getNode()) return Result; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADDC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N0.getValueType(); + + // If the flag result is dead, turn this into an ADD. + if (N->hasNUsesOfValue(0, 1)) + return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Flag)); + + // canonicalize constant to RHS. + if (N0C && !N1C) + return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + + // fold (addc x, 0) -> x + no carry out + if (N1C && N1C->isNullValue()) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Flag)); + + // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. + APInt LHSZero, LHSOne; + APInt RHSZero, RHSOne; + APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + + if (LHSZero.getBoolValue()) { + DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return an OR. + // If all possibly-set bits on the RHS are clear on the LHS, return an OR. + if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || + (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), MVT::Flag)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADDE(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), + N1, N0, CarryIn); + + // fold (adde x, y, false) -> (addc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitSUB(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + MVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (sub x, x) -> 0 + if (N0 == N1) + return DAG.getConstant(0, N->getValueType(0)); + // fold (sub c1, c2) -> c1-c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); + // fold (sub x, c) -> (add x, -c) + if (N1C) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, + DAG.getConstant(-N1C->getAPIntValue(), VT)); + // fold (A+B)-A -> B + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) + return N0.getOperand(1); + // fold (A+B)-B -> A + if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) + return N0.getOperand(0); + // fold ((A+(B+or-C))-B) -> A+or-C + if (N0.getOpcode() == ISD::ADD && + (N0.getOperand(1).getOpcode() == ISD::SUB || + N0.getOperand(1).getOpcode() == ISD::ADD) && + N0.getOperand(1).getOperand(0) == N1) + return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(1)); + // fold ((A+(C+B))-B) -> A+C + if (N0.getOpcode() == ISD::ADD && + N0.getOperand(1).getOpcode() == ISD::ADD && + N0.getOperand(1).getOperand(1) == N1) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(0)); + // fold ((A-(B-C))-C) -> A-B + if (N0.getOpcode() == ISD::SUB && + N0.getOperand(1).getOpcode() == ISD::SUB && + N0.getOperand(1).getOperand(1) == N1) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(0)); + + // If either operand of a sub is undef, the result is undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + // If the relocation model supports it, consider symbol offsets. + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) + if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { + // fold (sub Sym, c) -> Sym-c + if (N1C && GA->getOpcode() == ISD::GlobalAddress) + return DAG.getGlobalAddress(GA->getGlobal(), VT, + GA->getOffset() - + (uint64_t)N1C->getSExtValue()); + // fold (sub Sym+c1, Sym+c2) -> c1-c2 + if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) + if (GA->getGlobal() == GB->getGlobal()) + return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), + VT); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitMUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (mul x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (mul c1, c2) -> c1*c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0); + // fold (mul x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mul x, -1) -> 0-x + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), N0); + // fold (mul x, (1 << c)) -> x << c + if (N1C && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), + getShiftAmountTy())); + // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c + if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { + unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); + // FIXME: If the input is something that is easily negated (e.g. a + // single-use add), we should put the negate there. + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getConstant(Log2Val, getShiftAmountTy()))); + } + // (mul (shl X, c1), c2) -> (mul X, c2 << c1) + if (N1C && N0.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N0.getOperand(1))) { + SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + N1, N0.getOperand(1)); + AddToWorkList(C3.getNode()); + return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + N0.getOperand(0), C3); + } + + // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one + // use. + { + SDValue Sh(0,0), Y(0,0); + // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). + if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && + N0.getNode()->hasOneUse()) { + Sh = N0; Y = N1; + } else if (N1.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N1.getOperand(1)) && + N1.getNode()->hasOneUse()) { + Sh = N1; Y = N0; + } + + if (Sh.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + Sh.getOperand(0), Y); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + Mul, Sh.getOperand(1)); + } + } + + // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) + if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, + DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT, + N0.getOperand(0), N1), + DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT, + N0.getOperand(1), N1)); + + // reassociate mul + SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1); + if (RMUL.getNode() != 0) + return RMUL; + + return SDValue(); +} + +SDValue DAGCombiner::visitSDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (sdiv c1, c2) -> c1/c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); + // fold (sdiv X, 1) -> X + if (N1C && N1C->getSExtValue() == 1LL) + return N0; + // fold (sdiv X, -1) -> 0-X + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), N0); + // If we know the sign bits of both operands are zero, strength reduce to a + // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(), + N0, N1); + } + // fold (sdiv X, pow2) -> simple ops after legalize + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() && + (isPowerOf2_64(N1C->getSExtValue()) || + isPowerOf2_64(-N1C->getSExtValue()))) { + // If dividing by powers of two is cheap, then don't perform the following + // fold. + if (TLI.isPow2DivCheap()) + return SDValue(); + + int64_t pow2 = N1C->getSExtValue(); + int64_t abs2 = pow2 > 0 ? pow2 : -pow2; + unsigned lg2 = Log2_64(abs2); + + // Splat the sign bit into the register + SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, + DAG.getConstant(VT.getSizeInBits()-1, + getShiftAmountTy())); + AddToWorkList(SGN.getNode()); + + // Add (N0 < 0) ? abs2 - 1 : 0; + SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, + DAG.getConstant(VT.getSizeInBits() - lg2, + getShiftAmountTy())); + SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); + AddToWorkList(SRL.getNode()); + AddToWorkList(ADD.getNode()); // Divide by pow2 + SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, + DAG.getConstant(lg2, getShiftAmountTy())); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (pow2 > 0) + return SRA; + + AddToWorkList(SRA.getNode()); + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + DAG.getConstant(0, VT), SRA); + } + + // if integer divide is expensive and we satisfy the requirements, emit an + // alternate sequence. + if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) && + !TLI.isIntDivCheap()) { + SDValue Op = BuildSDIV(N); + if (Op.getNode()) return Op; + } + + // undef / X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X / undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (udiv c1, c2) -> c1/c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); + // fold (udiv x, (1 << c)) -> x >>u c + if (N1C && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), + getShiftAmountTy())); + // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + MVT ADDVT = N1.getOperand(1).getValueType(); + SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, + N1.getOperand(1), + DAG.getConstant(SHC->getAPIntValue() + .logBase2(), + ADDVT)); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add); + } + } + } + // fold (udiv x, c) -> alternate + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { + SDValue Op = BuildUDIV(N); + if (Op.getNode()) return Op; + } + + // undef / X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X / undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitSREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold (srem c1, c2) -> c1%c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); + // If we know the sign bits of both operands are zero, strength reduce to a + // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1); + } + + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. + if (N1C && !N1C->isNullValue()) { + SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1); + AddToWorkList(Div.getNode()); + SDValue OptimizedDiv = combine(Div.getNode()); + if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + AddToWorkList(Mul.getNode()); + return Sub; + } + } + + // undef % X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X % undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitUREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold (urem c1, c2) -> c1%c2 + if (N0C && N1C && !N1C->isNullValue()) + return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); + // fold (urem x, pow2) -> (and x, pow2-1) + if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, + DAG.getConstant(N1C->getAPIntValue()-1,VT)); + // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + SDValue Add = + DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), + VT)); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add); + } + } + } + + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. + if (N1C && !N1C->isNullValue()) { + SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1); + AddToWorkList(Div.getNode()); + SDValue OptimizedDiv = combine(Div.getNode()); + if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + AddToWorkList(Mul.getNode()); + return Sub; + } + } + + // undef % X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // X % undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitMULHS(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold (mulhs x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mulhs x, 1) -> (sra x, size(x)-1) + if (N1C && N1C->getAPIntValue() == 1) + return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, + DAG.getConstant(N0.getValueType().getSizeInBits() - 1, + getShiftAmountTy())); + // fold (mulhs x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + return SDValue(); +} + +SDValue DAGCombiner::visitMULHU(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold (mulhu x, 0) -> 0 + if (N1C && N1C->isNullValue()) + return N1; + // fold (mulhu x, 1) -> 0 + if (N1C && N1C->getAPIntValue() == 1) + return DAG.getConstant(0, N0.getValueType()); + // fold (mulhu x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + return SDValue(); +} + +/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that +/// compute two values. LoOp and HiOp give the opcodes for the two computations +/// that are being performed. Return true if a simplification was made. +/// +SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, + unsigned HiOp) { + // If the high half is not needed, just compute the low half. + bool HiExists = N->hasAnyUseOfValue(1); + if (!HiExists && + (!LegalOperations || + TLI.isOperationLegal(LoOp, N->getValueType(0)))) { + SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + N->op_begin(), N->getNumOperands()); + return CombineTo(N, Res, Res); + } + + // If the low half is not needed, just compute the high half. + bool LoExists = N->hasAnyUseOfValue(0); + if (!LoExists && + (!LegalOperations || + TLI.isOperationLegal(HiOp, N->getValueType(1)))) { + SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + N->op_begin(), N->getNumOperands()); + return CombineTo(N, Res, Res); + } + + // If both halves are used, return as it is. + if (LoExists && HiExists) + return SDValue(); + + // If the two computed results can be simplified separately, separate them. + if (LoExists) { + SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + N->op_begin(), N->getNumOperands()); + AddToWorkList(Lo.getNode()); + SDValue LoOpt = combine(Lo.getNode()); + if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && + (!LegalOperations || + TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) + return CombineTo(N, LoOpt, LoOpt); + } + + if (HiExists) { + SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + N->op_begin(), N->getNumOperands()); + AddToWorkList(Hi.getNode()); + SDValue HiOpt = combine(Hi.getNode()); + if (HiOpt.getNode() && HiOpt != Hi && + (!LegalOperations || + TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) + return CombineTo(N, HiOpt, HiOpt); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); + if (Res.getNode()) return Res; + + return SDValue(); +} + +SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); + if (Res.getNode()) return Res; + + return SDValue(); +} + +SDValue DAGCombiner::visitSDIVREM(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); + if (Res.getNode()) return Res; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIVREM(SDNode *N) { + SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); + if (Res.getNode()) return Res; + + return SDValue(); +} + +/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with +/// two operands of the same opcode, try to simplify it. +SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + MVT VT = N0.getValueType(); + assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); + + // For each of OP in AND/OR/XOR: + // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) + // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) + // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) + if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND|| + N0.getOpcode() == ISD::SIGN_EXTEND || + (N0.getOpcode() == ISD::TRUNCATE && + !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) { + SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + N0.getOperand(0).getValueType(), + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(ORNode.getNode()); + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode); + } + + // For each of OP in SHL/SRL/SRA/AND... + // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) + // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) + // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || + N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && + N0.getOperand(1) == N1.getOperand(1)) { + SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + N0.getOperand(0).getValueType(), + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(ORNode.getNode()); + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ORNode, N0.getOperand(1)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitAND(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LL, LR, RL, RR, CC0, CC1; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N1.getValueType(); + unsigned BitWidth = VT.getSizeInBits(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (and x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (and c1, c2) -> c1&c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0); + // fold (and x, -1) -> x + if (N1C && N1C->isAllOnesValue()) + return N0; + // if (and x, c) is known to be zero, return 0 + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(BitWidth))) + return DAG.getConstant(0, VT); + // reassociate and + SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); + if (RAND.getNode() != 0) + return RAND; + // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF + if (N1C && N0.getOpcode() == ISD::OR) + if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) + return N1; + // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + SDValue N0Op0 = N0.getOperand(0); + APInt Mask = ~N1C->getAPIntValue(); + Mask.trunc(N0Op0.getValueSizeInBits()); + if (DAG.MaskedValueIsZero(N0Op0, Mask)) { + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), + N0.getValueType(), N0Op0); + + // Replace uses of the AND with uses of the Zero extend node. + CombineTo(N, Zext); + + // We actually want to replace all uses of the any_extend with the + // zero_extend, to avoid duplicating things. This will later cause this + // AND to be folded. + CombineTo(N0.getNode(), Zext); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) + if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { + SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { + SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ANDNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + } + // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { + SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) + return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + LL, LR, Result); + } + } + + // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) + // fold (and (sra)) -> (and (srl)) when possible. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + // fold (zext_inreg (extload x)) -> (zextload x) + if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT EVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - EVT.getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use + if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT EVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - EVT.getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (and (load x), 255) -> (zextload x, i8) + // fold (and (extload x, i16), 255) -> (zextload x, i8) + if (N1C && N0.getOpcode() == ISD::LOAD) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + if (LN0->getExtensionType() != ISD::SEXTLOAD && + LN0->isUnindexed() && N0.hasOneUse() && + // Do not change the width of a volatile load. + !LN0->isVolatile()) { + MVT EVT = MVT::Other; + uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); + if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())) + EVT = MVT::getIntegerVT(ActiveBits); + + MVT LoadedVT = LN0->getMemoryVT(); + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (EVT != MVT::Other && LoadedVT.bitsGT(EVT) && EVT.isRound() && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + MVT PtrType = N0.getOperand(1).getValueType(); + + // For big endian targets, we need to add an offset to the pointer to + // load the correct bytes. For little endian systems, we merely need to + // read fewer bytes from the same pointer. + unsigned LVTStoreBytes = LoadedVT.getStoreSizeInBits()/8; + unsigned EVTStoreBytes = EVT.getStoreSizeInBits()/8; + unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; + unsigned Alignment = LN0->getAlignment(); + SDValue NewPtr = LN0->getBasePtr(); + + if (TLI.isBigEndian()) { + NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, + NewPtr, DAG.getConstant(PtrOff, PtrType)); + Alignment = MinAlign(Alignment, PtrOff); + } + + AddToWorkList(NewPtr.getNode()); + SDValue Load = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(), + NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), + EVT, LN0->isVolatile(), Alignment); + AddToWorkList(N); + CombineTo(N0.getNode(), Load, Load.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LL, LR, RL, RR, CC0, CC1; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N1.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (or x, undef) -> -1 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(~0ULL, VT); + // fold (or c1, c2) -> c1|c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0); + // fold (or x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (or x, -1) -> -1 + if (N1C && N1C->isAllOnesValue()) + return N1; + // fold (or x, c) -> c iff (x & ~c) == 0 + if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) + return N1; + // reassociate or + SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); + if (ROR.getNode() != 0) + return ROR; + // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) + if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + N0.getOperand(0), N1), + DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); + } + // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) + // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) + if (cast<ConstantSDNode>(LR)->isNullValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { + SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ORNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + } + // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) + // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { + SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(), + LR.getValueType(), LL, RL); + AddToWorkList(ANDNode.getNode()); + return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) + return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + LL, LR, Result); + } + } + + // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant && + N1.getOperand(1).getOpcode() == ISD::Constant && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + // We can only do this xform if we know that bits from X that are set in C2 + // but not in C1 are already zero. Likewise for Y. + const APInt &LHSMask = + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + const APInt &RHSMask = + cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); + + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + N0.getOperand(0), N1.getOperand(0)); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, + DAG.getConstant(LHSMask | RHSMask, VT)); + } + } + + // See if this is some rotate idiom. + if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) + return SDValue(Rot, 0); + + return SDValue(); +} + +/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. +static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { + if (Op.getOpcode() == ISD::AND) { + if (isa<ConstantSDNode>(Op.getOperand(1))) { + Mask = Op.getOperand(1); + Op = Op.getOperand(0); + } else { + return false; + } + } + + if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { + Shift = Op; + return true; + } + + return false; +} + +// MatchRotate - Handle an 'or' of two operands. If this is one of the many +// idioms for rotate, and if the target supports rotation instructions, generate +// a rot[lr]. +SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { + // Must be a legal type. Expanded 'n promoted things won't work with rotates. + MVT VT = LHS.getValueType(); + if (!TLI.isTypeLegal(VT)) return 0; + + // The target must have at least one rotate flavor. + bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); + bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); + if (!HasROTL && !HasROTR) return 0; + + // Match "(X shl/srl V1) & V2" where V2 may not be present. + SDValue LHSShift; // The shift. + SDValue LHSMask; // AND value if any. + if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) + return 0; // Not part of a rotate. + + SDValue RHSShift; // The shift. + SDValue RHSMask; // AND value if any. + if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) + return 0; // Not part of a rotate. + + if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) + return 0; // Not shifting the same value. + + if (LHSShift.getOpcode() == RHSShift.getOpcode()) + return 0; // Shifts must disagree. + + // Canonicalize shl to left side in a shl/srl pair. + if (RHSShift.getOpcode() == ISD::SHL) { + std::swap(LHS, RHS); + std::swap(LHSShift, RHSShift); + std::swap(LHSMask , RHSMask ); + } + + unsigned OpSizeInBits = VT.getSizeInBits(); + SDValue LHSShiftArg = LHSShift.getOperand(0); + SDValue LHSShiftAmt = LHSShift.getOperand(1); + SDValue RHSShiftAmt = RHSShift.getOperand(1); + + // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) + // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) + if (LHSShiftAmt.getOpcode() == ISD::Constant && + RHSShiftAmt.getOpcode() == ISD::Constant) { + uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); + uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); + if ((LShVal + RShVal) != OpSizeInBits) + return 0; + + SDValue Rot; + if (HasROTL) + Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt); + else + Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt); + + // If there is an AND of either shifted operand, apply it to the result. + if (LHSMask.getNode() || RHSMask.getNode()) { + APInt Mask = APInt::getAllOnesValue(OpSizeInBits); + + if (LHSMask.getNode()) { + APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); + Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; + } + if (RHSMask.getNode()) { + APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); + Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; + } + + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); + } + + return Rot.getNode(); + } + + // If there is a mask here, and we have a variable shift, we can't be sure + // that we're masking out the right stuff. + if (LHSMask.getNode() || RHSMask.getNode()) + return 0; + + // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) + // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) + if (RHSShiftAmt.getOpcode() == ISD::SUB && + LHSShiftAmt == RHSShiftAmt.getOperand(1)) { + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + if (HasROTL) + return DAG.getNode(ISD::ROTL, DL, VT, + LHSShiftArg, LHSShiftAmt).getNode(); + else + return DAG.getNode(ISD::ROTR, DL, VT, + LHSShiftArg, RHSShiftAmt).getNode(); + } + } + } + + // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) + // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) + if (LHSShiftAmt.getOpcode() == ISD::SUB && + RHSShiftAmt == LHSShiftAmt.getOperand(1)) { + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + if (HasROTR) + return DAG.getNode(ISD::ROTR, DL, VT, + LHSShiftArg, RHSShiftAmt).getNode(); + else + return DAG.getNode(ISD::ROTL, DL, VT, + LHSShiftArg, LHSShiftAmt).getNode(); + } + } + } + + // Look for sign/zext/any-extended or truncate cases: + if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND + || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND + || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND + || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && + (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND + || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND + || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND + || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { + SDValue LExtOp0 = LHSShiftAmt.getOperand(0); + SDValue RExtOp0 = RHSShiftAmt.getOperand(0); + if (RExtOp0.getOpcode() == ISD::SUB && + RExtOp0.getOperand(1) == LExtOp0) { + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotr x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, + LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); + } + } + } else if (LExtOp0.getOpcode() == ISD::SUB && + RExtOp0 == LExtOp0.getOperand(1)) { + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotr x, y) + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotl x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, + LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); + } + } + } + } + + return 0; +} + +SDValue DAGCombiner::visitXOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue LHS, RHS, CC; + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (xor undef, undef) -> 0. This is a common idiom (misuse). + if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (xor x, undef) -> undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + // fold (xor c1, c2) -> c1^c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); + // canonicalize constant to RHS + if (N0C && !N1C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + // fold (xor x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // reassociate xor + SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1); + if (RXOR.getNode() != 0) + return RXOR; + + // fold !(x cc y) -> (x !cc y) + if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { + bool isInt = LHS.getValueType().isInteger(); + ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + isInt); + + if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) { + switch (N0.getOpcode()) { + default: + assert(0 && "Unhandled SetCC Equivalent!"); + abort(); + case ISD::SETCC: + return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); + case ISD::SELECT_CC: + return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2), + N0.getOperand(3), NotCC); + } + } + } + + // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) + if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getNode()->hasOneUse() && + isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ + SDValue V = N0.getOperand(0); + V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V, + DAG.getConstant(1, V.getValueType())); + AddToWorkList(V.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V); + } + + // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc + if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && + (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { + unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + } + } + // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants + if (N1C && N1C->isAllOnesValue() && + (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { + unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + } + } + // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) + if (N1C && N0.getOpcode() == ISD::XOR) { + ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (N00C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1), + DAG.getConstant(N1C->getAPIntValue() ^ + N00C->getAPIntValue(), VT)); + if (N01C) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(N1C->getAPIntValue() ^ + N01C->getAPIntValue(), VT)); + } + // fold (xor x, x) -> 0 + if (N0 == N1) { + if (!VT.isVector()) { + return DAG.getConstant(0, VT); + } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){ + // Produce a vector of zeros. + SDValue El = DAG.getConstant(0, VT.getVectorElementType()); + std::vector<SDValue> Ops(VT.getVectorNumElements(), El); + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + } + + // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; + } + + // Simplify the expression using non-local knowledge. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +/// visitShiftByConstant - Handle transforms common to the three shifts, when +/// the shift amount is a constant. +SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { + SDNode *LHS = N->getOperand(0).getNode(); + if (!LHS->hasOneUse()) return SDValue(); + + // We want to pull some binops through shifts, so that we have (and (shift)) + // instead of (shift (and)), likewise for add, or, xor, etc. This sort of + // thing happens with address calculations, so it's important to canonicalize + // it. + bool HighBitSet = false; // Can we transform this if the high bit is set? + + switch (LHS->getOpcode()) { + default: return SDValue(); + case ISD::OR: + case ISD::XOR: + HighBitSet = false; // We can only transform sra if the high bit is clear. + break; + case ISD::AND: + HighBitSet = true; // We can only transform sra if the high bit is set. + break; + case ISD::ADD: + if (N->getOpcode() != ISD::SHL) + return SDValue(); // only shl(add) not sr[al](add). + HighBitSet = false; // We can only transform sra if the high bit is clear. + break; + } + + // We require the RHS of the binop to be a constant as well. + ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); + if (!BinOpCst) return SDValue(); + + // FIXME: disable this unless the input to the binop is a shift by a constant. + // If it is not a shift, it pessimizes some common cases like: + // + // void foo(int *X, int i) { X[i & 1235] = 1; } + // int bar(int *X, int i) { return X[i & 255]; } + SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); + if ((BinOpLHSVal->getOpcode() != ISD::SHL && + BinOpLHSVal->getOpcode() != ISD::SRA && + BinOpLHSVal->getOpcode() != ISD::SRL) || + !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) + return SDValue(); + + MVT VT = N->getValueType(0); + + // If this is a signed shift right, and the high bit is modified by the + // logical operation, do not perform the transformation. The highBitSet + // boolean indicates the value of the high bit of the constant which would + // cause it to be modified for this operation. + if (N->getOpcode() == ISD::SRA) { + bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); + if (BinOpRHSSignSet != HighBitSet) + return SDValue(); + } + + // Fold the constants, shifting the binop RHS by the shift amount. + SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(), + N->getValueType(0), + LHS->getOperand(1), N->getOperand(1)); + + // Create the new shift. + SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(), + VT, LHS->getOperand(0), N->getOperand(1)); + + // Create the new binop. + return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS); +} + +SDValue DAGCombiner::visitSHL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getSizeInBits(); + + // fold (shl c1, c2) -> c1<<c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); + // fold (shl 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (shl x, c >= size(x)) -> undef + if (N1C && N1C->getZExtValue() >= OpSizeInBits) + return DAG.getUNDEF(VT); + // fold (shl x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // if (shl x, c) is known to be zero, return 0 + if (DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(VT.getSizeInBits()))) + return DAG.getConstant(0, VT); + // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { + MVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC.trunc(TruncVT.getSizeInBits()); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SHL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 > OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or + // (srl (and x, (shl -1, c1)), (sub c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(~0ULL << c1, VT)); + if (c2 > c1) + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask, + DAG.getConstant(c2-c1, N1.getValueType())); + else + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask, + DAG.getConstant(c1-c2, N1.getValueType())); + } + // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) + if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(~0ULL << N1C->getZExtValue(), VT)); + + return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); +} + +SDValue DAGCombiner::visitSRA(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N0.getValueType(); + + // fold (sra c1, c2) -> (sra c1, c2) + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); + // fold (sra 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (sra -1, x) -> -1 + if (N0C && N0C->isAllOnesValue()) + return N0; + // fold (sra x, (setge c, size(x))) -> undef + if (N1C && N1C->getZExtValue() >= VT.getSizeInBits()) + return DAG.getUNDEF(VT); + // fold (sra x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports + // sext_inreg. + if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { + unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue(); + MVT EVT = MVT::getIntegerVT(LowBits); + if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + N0.getOperand(0), DAG.getValueType(EVT)); + } + + // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRA) { + if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); + if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1; + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(Sum, N1C->getValueType(0))); + } + } + + // fold (sra (shl X, m), (sub result_size, n)) + // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for + // result_size - n != m. + // If truncate is free for the target sext(shl) is likely to result in better + // code. + if (N0.getOpcode() == ISD::SHL) { + // Get the two constanst of the shifts, CN0 = m, CN = n. + const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (N01C && N1C) { + // Determine what the truncate's result bitsize and type would be. + unsigned VTValSize = VT.getSizeInBits(); + MVT TruncVT = + MVT::getIntegerVT(VTValSize - N1C->getZExtValue()); + // Determine the residual right-shift amount. + signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); + + // If the shift is not a no-op (in which case this should be just a sign + // extend already), the truncated to type is legal, sign_extend is legal + // on that type, and the the truncate to that type is both legal and free, + // perform the transform. + if ((ShiftAmt > 0) && + TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && + TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && + TLI.isTruncateFree(VT, TruncVT)) { + + SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy()); + SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, + N0.getOperand(0), Amt); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, + Shift); + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), + N->getValueType(0), Trunc); + } + } + } + + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { + MVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC.trunc(TruncVT.getSizeInBits()); + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), + TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + // Simplify, based on bits shifted out of the LHS. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + + // If the sign bit is known to be zero, switch this to a SRL. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); + + return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); +} + +SDValue DAGCombiner::visitSRL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + MVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getSizeInBits(); + + // fold (srl c1, c2) -> c1 >>u c2 + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); + // fold (srl 0, x) -> 0 + if (N0C && N0C->isNullValue()) + return N0; + // fold (srl x, c >= size(x)) -> undef + if (N1C && N1C->getZExtValue() >= OpSizeInBits) + return DAG.getUNDEF(VT); + // fold (srl x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // if (srl x, c) is known to be zero, return 0 + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(OpSizeInBits))) + return DAG.getConstant(0, VT); + + // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRL && + N0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 > OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + + // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + // Shifting in all undef bits? + MVT SmallVT = N0.getOperand(0).getValueType(); + if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) + return DAG.getUNDEF(VT); + + SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, + N0.getOperand(0), N1); + AddToWorkList(SmallShift.getNode()); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + } + + // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign + // bit, which is unmodified by sra. + if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { + if (N0.getOpcode() == ISD::SRA) + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1); + } + + // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). + if (N1C && N0.getOpcode() == ISD::CTLZ && + N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); + + // If any of the input bits are KnownOne, then the input couldn't be all + // zeros, thus the result of the srl will always be zero. + if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); + + // If all of the bits input the to ctlz node are known to be zero, then + // the result of the ctlz is "32" and the result of the shift is one. + APInt UnknownBits = ~KnownZero & Mask; + if (UnknownBits == 0) return DAG.getConstant(1, VT); + + // Otherwise, check to see if there is exactly one bit input to the ctlz. + if ((UnknownBits & (UnknownBits - 1)) == 0) { + // Okay, we know that only that the single bit specified by UnknownBits + // could be set on input to the CTLZ node. If this bit is set, the SRL + // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair + // to an SRL/XOR pair, which is likely to simplify more. + unsigned ShAmt = UnknownBits.countTrailingZeros(); + SDValue Op = N0.getOperand(0); + + if (ShAmt) { + Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, + DAG.getConstant(ShAmt, getShiftAmountTy())); + AddToWorkList(Op.getNode()); + } + + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + Op, DAG.getConstant(1, VT)); + } + } + + // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND && + N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { + SDValue N101 = N1.getOperand(0).getOperand(1); + if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { + MVT TruncVT = N1.getValueType(); + SDValue N100 = N1.getOperand(0).getOperand(0); + APInt TruncC = N101C->getAPIntValue(); + TruncC.trunc(TruncVT.getSizeInBits()); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::AND, N->getDebugLoc(), + TruncVT, + DAG.getNode(ISD::TRUNCATE, + N->getDebugLoc(), + TruncVT, N100), + DAG.getConstant(TruncC, TruncVT))); + } + } + + // fold operands of srl based on knowledge that the low bits are not + // demanded. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); +} + +SDValue DAGCombiner::visitCTLZ(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (ctlz c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTTZ(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (cttz c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTPOP(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (ctpop c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitSELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); + MVT VT = N->getValueType(0); + MVT VT0 = N0.getValueType(); + + // fold (select C, X, X) -> X + if (N1 == N2) + return N1; + // fold (select true, X, Y) -> X + if (N0C && !N0C->isNullValue()) + return N1; + // fold (select false, X, Y) -> Y + if (N0C && N0C->isNullValue()) + return N2; + // fold (select C, 1, X) -> (or C, X) + if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + // fold (select C, 0, 1) -> (xor C, 1) + if (VT.isInteger() && + (VT0 == MVT::i1 || + (VT0.isInteger() && + TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) && + N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { + SDValue XORNode; + if (VT == VT0) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0, + N0, DAG.getConstant(1, VT0)); + XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0, + N0, DAG.getConstant(1, VT0)); + AddToWorkList(XORNode.getNode()); + if (VT.bitsGT(VT0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode); + } + // fold (select C, 0, X) -> (and (not C), X) + if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { + SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + AddToWorkList(NOTNode.getNode()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2); + } + // fold (select C, X, 1) -> (or (not C), X) + if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { + SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + AddToWorkList(NOTNode.getNode()); + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1); + } + // fold (select C, X, 0) -> (and C, X) + if (VT == MVT::i1 && N2C && N2C->isNullValue()) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + // fold (select X, X, Y) -> (or X, Y) + // fold (select X, 1, Y) -> (or X, Y) + if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + // fold (select X, Y, X) -> (and X, Y) + // fold (select X, Y, 0) -> (and X, Y) + if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N1, N2)) + return SDValue(N, 0); // Don't revisit N. + + // fold selects based on a setcc into other things, such as min/max/abs + if (N0.getOpcode() == ISD::SETCC) { + // FIXME: + // Check against MVT::Other for SELECT_CC, which is a workaround for targets + // having to say they don't support SELECT_CC on every type the DAG knows + // about, since there is no way to mark an opcode illegal at all value types + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1), + N1, N2, N0.getOperand(2)); + return SimplifySelect(N->getDebugLoc(), N0, N1, N2); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + SDValue N4 = N->getOperand(4); + ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); + + // fold select_cc lhs, rhs, x, x, cc -> x + if (N2 == N3) + return N2; + + // Determine if the condition we're dealing with is constant + SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC, N->getDebugLoc(), false); + if (SCC.getNode()) AddToWorkList(SCC.getNode()); + + if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { + if (!SCCC->isNullValue()) + return N2; // cond always true -> true val + else + return N3; // cond always false -> false val + } + + // Fold to a simpler select_cc + if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), N2, N3, + SCC.getOperand(2)); + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N2, N3)) + return SDValue(N, 0); // Don't revisit N. + + // fold select_cc into other things, such as min/max/abs + return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC); +} + +SDValue DAGCombiner::visitSETCC(SDNode *N) { + return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), + cast<CondCodeSDNode>(N->getOperand(2))->get(), + N->getDebugLoc()); +} + +// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: +// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" +// transformation. Returns true if extension are possible and the above +// mentioned transformation is profitable. +static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, + unsigned ExtOpc, + SmallVector<SDNode*, 4> &ExtendNodes, + const TargetLowering &TLI) { + bool HasCopyToRegUses = false; + bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); + for (SDNode::use_iterator UI = N0.getNode()->use_begin(), + UE = N0.getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User == N) + continue; + if (UI.getUse().getResNo() != N0.getResNo()) + continue; + // FIXME: Only extend SETCC N, N and SETCC N, c for now. + if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { + ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); + if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) + // Sign bits will be lost after a zext. + return false; + bool Add = false; + for (unsigned i = 0; i != 2; ++i) { + SDValue UseOp = User->getOperand(i); + if (UseOp == N0) + continue; + if (!isa<ConstantSDNode>(UseOp)) + return false; + Add = true; + } + if (Add) + ExtendNodes.push_back(User); + continue; + } + // If truncates aren't free and there are users we can't + // extend, it isn't worthwhile. + if (!isTruncFree) + return false; + // Remember if this value is live-out. + if (User->getOpcode() == ISD::CopyToReg) + HasCopyToRegUses = true; + } + + if (HasCopyToRegUses) { + bool BothLiveOut = false; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDUse &Use = UI.getUse(); + if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { + BothLiveOut = true; + break; + } + } + if (BothLiveOut) + // Both unextended and extended values are live out. There had better be + // good a reason for the transformation. + return ExtendNodes.size(); + } + return true; +} + +SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (sext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); + + // fold (sext (sext x)) -> (sext x) + // fold (sext (aext x)) -> (sext x) + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)); + + if (N0.getOpcode() == ISD::TRUNCATE) { + // fold (sext (truncate (load x))) -> (sext (smaller load x)) + // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + if (NarrowLoad.getNode() != N0.getNode()) + CombineTo(N0.getNode(), NarrowLoad); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // See if the value being truncated is already sign extended. If so, just + // eliminate the trunc/sext pair. + SDValue Op = N0.getOperand(0); + unsigned OpBits = Op.getValueType().getSizeInBits(); + unsigned MidBits = N0.getValueType().getSizeInBits(); + unsigned DestBits = VT.getSizeInBits(); + unsigned NumSignBits = DAG.ComputeNumSignBits(Op); + + if (OpBits == DestBits) { + // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign + // bits, it is already ready. + if (NumSignBits > DestBits-MidBits) + return Op; + } else if (OpBits < DestBits) { + // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign + // bits, just sext from i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op); + } else { + // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign + // bits, just truncate to i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + } + + // fold (sext (truncate x)) -> (sextinreg x). + if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, + N0.getValueType())) { + if (Op.getValueType().bitsLT(VT)) + Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); + else if (Op.getValueType().bitsGT(VT)) + Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, + DAG.getValueType(N0.getValueType())); + } + } + + // fold (sext (load x)) -> (sext (truncate (sextload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, + N->getDebugLoc(), VT, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (sext (sextload x)) -> (sext (truncate (sextload x))) + // fold (sext ( extload x)) -> (sext (truncate (sextload x))) + if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT EVT = LN0->getMemoryVT(); + if ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) { + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + if (N0.getOpcode() == ISD::SETCC) { + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) return SCC; + } + + // fold (sext x) -> (zext x) if the sign bit is known zero. + if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && + DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (zext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + // fold (zext (zext x)) -> (zext x) + // fold (zext (aext x)) -> (zext x) + if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)); + + // fold (zext (truncate (load x))) -> (zext (smaller load x)) + // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + if (NarrowLoad.getNode() != N0.getNode()) + CombineTo(N0.getNode(), NarrowLoad); + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + } + } + + // fold (zext (truncate x)) -> (and x, mask) + if (N0.getOpcode() == ISD::TRUNCATE && + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { + SDValue Op = N0.getOperand(0); + if (Op.getValueType().bitsLT(VT)) { + Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); + } else if (Op.getValueType().bitsGT(VT)) { + Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + } + return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType()); + } + + // Fold (zext (and (trunc x), cst)) -> (and x, cst), + // if either of the casts is not free. + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant && + (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType()) || + !TLI.isZExtFree(N0.getValueType(), VT))) { + SDValue X = N0.getOperand(0).getOperand(0); + if (X.getValueType().bitsLT(VT)) { + X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); + } else if (X.getValueType().bitsGT(VT)) { + X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + } + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + X, DAG.getConstant(Mask, VT)); + } + + // fold (zext (load x)) -> (zext (truncate (zextload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND, + N->getDebugLoc(), VT, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (zext (zextload x)) -> (zext (truncate (zextload x))) + // fold (zext ( extload x)) -> (zext (truncate (zextload x))) + if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT EVT = LN0->getMemoryVT(); + if ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT)) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), + ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + if (N0.getOpcode() == ISD::SETCC) { + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, VT), DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) return SCC; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // fold (aext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0); + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + if (N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); + + // fold (aext (truncate (load x))) -> (aext (smaller load x)) + // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + if (NarrowLoad.getNode() != N0.getNode()) + CombineTo(N0.getNode(), NarrowLoad); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + } + } + + // fold (aext (truncate x)) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue TruncOp = N0.getOperand(0); + if (TruncOp.getValueType() == VT) + return TruncOp; // x iff x size == zext size. + if (TruncOp.getValueType().bitsGT(VT)) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); + } + + // Fold (aext (and (trunc x), cst)) -> (and x, cst) + // if the trunc is not free. + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant && + !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType())) { + SDValue X = N0.getOperand(0).getOperand(0); + if (X.getValueType().bitsLT(VT)) { + X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); + } else if (X.getValueType().bitsGT(VT)) { + X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); + } + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + X, DAG.getConstant(Mask, VT)); + } + + // fold (aext (load x)) -> (aext (truncate (extload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::ANY_EXTEND, + N->getDebugLoc(), VT, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (aext (zextload x)) -> (aext (truncate (zextload x))) + // fold (aext (sextload x)) -> (aext (truncate (sextload x))) + // fold (aext ( extload x)) -> (aext (truncate (extload x))) + if (N0.getOpcode() == ISD::LOAD && + !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT EVT = LN0->getMemoryVT(); + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), + VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + if (N0.getOpcode() == ISD::SETCC) { + SDValue SCC = + SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, VT), DAG.getConstant(0, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) + return SCC; + } + + return SDValue(); +} + +/// GetDemandedBits - See if the specified operand can be simplified with the +/// knowledge that only the bits specified by Mask are used. If so, return the +/// simpler operand, otherwise return a null SDValue. +SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { + switch (V.getOpcode()) { + default: break; + case ISD::OR: + case ISD::XOR: + // If the LHS or RHS don't contribute bits to the or, drop them. + if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) + return V.getOperand(1); + if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) + return V.getOperand(0); + break; + case ISD::SRL: + // Only look at single-use SRLs. + if (!V.getNode()->hasOneUse()) + break; + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { + // See if we can recursively simplify the LHS. + unsigned Amt = RHSC->getZExtValue(); + + // Watch out for shift count overflow though. + if (Amt >= Mask.getBitWidth()) break; + APInt NewMask = Mask << Amt; + SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); + if (SimplifyLHS.getNode()) + return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(), + SimplifyLHS, V.getOperand(1)); + } + } + return SDValue(); +} + +/// ReduceLoadWidth - If the result of a wider load is shifted to right of N +/// bits and then truncated to a narrower type and where N is a multiple +/// of number of bits of the narrower type, transform it to a narrower load +/// from address + N / num of bits of new type. If the result is to be +/// extended, also fold the extension to form a extending load. +SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { + unsigned Opc = N->getOpcode(); + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + MVT EVT = VT; + + // This transformation isn't valid for vector loads. + if (VT.isVector()) + return SDValue(); + + // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then + // extended to VT. + if (Opc == ISD::SIGN_EXTEND_INREG) { + ExtType = ISD::SEXTLOAD; + EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) + return SDValue(); + } + + unsigned EVTBits = EVT.getSizeInBits(); + unsigned ShAmt = 0; + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + ShAmt = N01->getZExtValue(); + // Is the shift amount a multiple of size of VT? + if ((ShAmt & (EVTBits-1)) == 0) { + N0 = N0.getOperand(0); + if (N0.getValueType().getSizeInBits() <= EVTBits) + return SDValue(); + } + } + } + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (isa<LoadSDNode>(N0) && N0.hasOneUse() && EVT.isRound() && + cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits && + // Do not change the width of a volatile load. + !cast<LoadSDNode>(N0)->isVolatile()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + MVT PtrType = N0.getOperand(1).getValueType(); + + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (TLI.isBigEndian()) { + unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); + unsigned EVTStoreBits = EVT.getStoreSizeInBits(); + ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; + } + + uint64_t PtrOff = ShAmt / 8; + unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); + SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), + PtrType, LN0->getBasePtr(), + DAG.getConstant(PtrOff, PtrType)); + AddToWorkList(NewPtr.getNode()); + + SDValue Load = (ExtType == ISD::NON_EXTLOAD) + ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, + LN0->isVolatile(), NewAlign) + : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, + EVT, LN0->isVolatile(), NewAlign); + + // Replace the old load's chain with the new load's chain. + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), + &DeadNodes); + + // Return the new loaded value. + return Load; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + MVT VT = N->getValueType(0); + MVT EVT = cast<VTSDNode>(N1)->getVT(); + unsigned VTBits = VT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + // fold (sext_in_reg c1) -> c1 + if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); + + // If the input is already sign extended, just drop the extension. + if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1) + return N0; + + // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) { + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + N0.getOperand(0), N1); + } + + // fold (sext_in_reg (sext x)) -> (sext x) + // fold (sext_in_reg (aext x)) -> (sext x) + // if x is small enough. + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getValueType().getSizeInBits() < EVTBits) + return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); + } + + // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. + if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) + return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT); + + // fold operands of sext_in_reg based on knowledge that the top bits are not + // demanded. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (sext_in_reg (load x)) -> (smaller sextload x) + // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) + SDValue NarrowLoad = ReduceLoadWidth(N); + if (NarrowLoad.getNode()) + return NarrowLoad; + + // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) + // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. + // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. + if (N0.getOpcode() == ISD::SRL) { + if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) { + // We can turn this into an SRA iff the input to the SRL is already sign + // extended enough. + unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); + if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits) + return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1)); + } + } + + // fold (sext_inreg (extload x)) -> (sextload x) + if (ISD::isEXTLoad(N0.getNode()) && + ISD::isUNINDEXEDLoad(N0.getNode()) && + EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use + if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse() && + EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), EVT, + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + return SDValue(); +} + +SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // noop truncate + if (N0.getValueType() == N->getValueType(0)) + return N0; + // fold (truncate c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0); + // fold (truncate (truncate x)) -> (truncate x) + if (N0.getOpcode() == ISD::TRUNCATE) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + // fold (truncate (ext x)) -> (ext x) or (truncate x) or x + if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND|| + N0.getOpcode() == ISD::ANY_EXTEND) { + if (N0.getOperand(0).getValueType().bitsLT(VT)) + // if the source is smaller than the dest, we still need an extend + return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0)); + else if (N0.getOperand(0).getValueType().bitsGT(VT)) + // if the source is larger than the dest, than we just need the truncate + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + else + // if the source and dest are the same type, we can drop both the extend + // and the truncate + return N0.getOperand(0); + } + + // See if we can simplify the input to this truncate through knowledge that + // only the low bits are being used. For example "trunc (or (shl x, 8), y)" + // -> trunc y + SDValue Shorter = + GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), + VT.getSizeInBits())); + if (Shorter.getNode()) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); + + // fold (truncate (load x)) -> (smaller load x) + // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) + return ReduceLoadWidth(N); +} + +static SDNode *getBuildPairElt(SDNode *N, unsigned i) { + SDValue Elt = N->getOperand(i); + if (Elt.getOpcode() != ISD::MERGE_VALUES) + return Elt.getNode(); + return Elt.getOperand(Elt.getResNo()).getNode(); +} + +/// CombineConsecutiveLoads - build_pair (load, load) -> load +/// if load locations are consecutive. +SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { + assert(N->getOpcode() == ISD::BUILD_PAIR); + + SDNode *LD1 = getBuildPairElt(N, 0); + if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) + return SDValue(); + MVT LD1VT = LD1->getValueType(0); + SDNode *LD2 = getBuildPairElt(N, 1); + const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + + if (ISD::isNON_EXTLoad(LD2) && + LD2->hasOneUse() && + // If both are volatile this would reduce the number of volatile loads. + // If one is volatile it might be ok, but play conservative and bail out. + !cast<LoadSDNode>(LD1)->isVolatile() && + !cast<LoadSDNode>(LD2)->isVolatile() && + TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) { + LoadSDNode *LD = cast<LoadSDNode>(LD1); + unsigned Align = LD->getAlignment(); + unsigned NewAlign = TLI.getTargetData()-> + getABITypeAlignment(VT.getTypeForMVT()); + + if (NewAlign <= Align && + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) + return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + false, Align); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + + // If the input is a BUILD_VECTOR with all constant elements, fold this now. + // Only do this before legalize, since afterward the target may be depending + // on the bitconvert. + // First check to see if this is all constant. + if (!LegalTypes && + N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && + VT.isVector()) { + bool isSimple = true; + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) + if (N0.getOperand(i).getOpcode() != ISD::UNDEF && + N0.getOperand(i).getOpcode() != ISD::Constant && + N0.getOperand(i).getOpcode() != ISD::ConstantFP) { + isSimple = false; + break; + } + + MVT DestEltVT = N->getValueType(0).getVectorElementType(); + assert(!DestEltVT.isVector() && + "Element type of vector ValueType must not be vector!"); + if (isSimple) + return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT); + } + + // If the input is a constant, let getNode fold it. + if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { + SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0); + if (Res.getNode() != N) return Res; + } + + // (conv (conv x, t1), t2) -> (conv x, t2) + if (N0.getOpcode() == ISD::BIT_CONVERT) + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, + N0.getOperand(0)); + + // fold (conv (load x)) -> (load (conv*)x) + // If the resultant load doesn't need a higher alignment than the original! + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + // Do not change the width of a volatile load. + !cast<LoadSDNode>(N0)->isVolatile() && + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + unsigned Align = TLI.getTargetData()-> + getABITypeAlignment(VT.getTypeForMVT()); + unsigned OrigAlign = LN0->getAlignment(); + + if (Align <= OrigAlign) { + SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), + LN0->getBasePtr(), + LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->isVolatile(), OrigAlign); + AddToWorkList(N); + CombineTo(N0.getNode(), + DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + N0.getValueType(), Load), + Load.getValue(1)); + return Load; + } + } + + // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) + // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + // This often reduces constant pool loads. + if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) && + N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { + SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT, + N0.getOperand(0)); + AddToWorkList(NewConv.getNode()); + + APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + if (N0.getOpcode() == ISD::FNEG) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + NewConv, DAG.getConstant(SignBit, VT)); + assert(N0.getOpcode() == ISD::FABS); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + NewConv, DAG.getConstant(~SignBit, VT)); + } + + // fold (bitconvert (fcopysign cst, x)) -> + // (or (and (bitconvert x), sign), (and cst, (not sign))) + // Note that we don't handle (copysign x, cst) because this can always be + // folded to an fneg or fabs. + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && + isa<ConstantFPSDNode>(N0.getOperand(0)) && + VT.isInteger() && !VT.isVector()) { + unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); + MVT IntXVT = MVT::getIntegerVT(OrigXWidth); + if (TLI.isTypeLegal(IntXVT) || !LegalTypes) { + SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + IntXVT, N0.getOperand(1)); + AddToWorkList(X.getNode()); + + // If X has a different width than the result/lhs, sext it or truncate it. + unsigned VTWidth = VT.getSizeInBits(); + if (OrigXWidth < VTWidth) { + X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X); + AddToWorkList(X.getNode()); + } else if (OrigXWidth > VTWidth) { + // To get the sign bit in the right place, we have to shift it right + // before truncating. + X = DAG.getNode(ISD::SRL, X.getDebugLoc(), + X.getValueType(), X, + DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); + AddToWorkList(X.getNode()); + X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + AddToWorkList(X.getNode()); + } + + APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT, + X, DAG.getConstant(SignBit, VT)); + AddToWorkList(X.getNode()); + + SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + VT, N0.getOperand(0)); + Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, + Cst, DAG.getConstant(~SignBit, VT)); + AddToWorkList(Cst.getNode()); + + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst); + } + } + + // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. + if (N0.getOpcode() == ISD::BUILD_PAIR) { + SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); + if (CombineLD.getNode()) + return CombineLD; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { + MVT VT = N->getValueType(0); + return CombineConsecutiveLoads(N, VT); +} + +/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector +/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the +/// destination element value type. +SDValue DAGCombiner:: +ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { + MVT SrcEltVT = BV->getValueType(0).getVectorElementType(); + + // If this is already the right type, we're done. + if (SrcEltVT == DstEltVT) return SDValue(BV, 0); + + unsigned SrcBitSize = SrcEltVT.getSizeInBits(); + unsigned DstBitSize = DstEltVT.getSizeInBits(); + + // If this is a conversion of N elements of one type to N elements of another + // type, convert each element. This handles FP<->INT cases. + if (SrcBitSize == DstBitSize) { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + SDValue Op = BV->getOperand(i); + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + if (Op.getValueType() != SrcEltVT) + Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); + Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), + DstEltVT, Op)); + AddToWorkList(Ops.back().getNode()); + } + MVT VT = MVT::getVectorVT(DstEltVT, + BV->getValueType(0).getVectorNumElements()); + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + + // Otherwise, we're growing or shrinking the elements. To avoid having to + // handle annoying details of growing/shrinking FP values, we convert them to + // int first. + if (SrcEltVT.isFloatingPoint()) { + // Convert the input float vector to a int vector where the elements are the + // same sizes. + assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); + MVT IntVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits()); + BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode(); + SrcEltVT = IntVT; + } + + // Now we know the input is an integer vector. If the output is a FP type, + // convert to integer first, then to FP of the right size. + if (DstEltVT.isFloatingPoint()) { + assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); + MVT TmpVT = MVT::getIntegerVT(DstEltVT.getSizeInBits()); + SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode(); + + // Next, convert to FP elements of the same size. + return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT); + } + + // Okay, we know the src/dst types are both integers of differing types. + // Handling growing first. + assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); + if (SrcBitSize < DstBitSize) { + unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; + i += NumInputsPerOutput) { + bool isLE = TLI.isLittleEndian(); + APInt NewBits = APInt(DstBitSize, 0); + bool EltIsUndef = true; + for (unsigned j = 0; j != NumInputsPerOutput; ++j) { + // Shift the previously computed bits over. + NewBits <<= SrcBitSize; + SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); + if (Op.getOpcode() == ISD::UNDEF) continue; + EltIsUndef = false; + + NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()). + zextOrTrunc(SrcBitSize).zext(DstBitSize)); + } + + if (EltIsUndef) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + else + Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); + } + + MVT VT = MVT::getVectorVT(DstEltVT, Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + + // Finally, this must be the case where we are shrinking elements: each input + // turns into multiple outputs. + bool isS2V = ISD::isScalarToVector(BV); + unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; + MVT VT = MVT::getVectorVT(DstEltVT, NumOutputsPerInput*BV->getNumOperands()); + SmallVector<SDValue, 8> Ops; + + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { + for (unsigned j = 0; j != NumOutputsPerInput; ++j) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + continue; + } + + APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))-> + getAPIntValue()).zextOrTrunc(SrcBitSize); + + for (unsigned j = 0; j != NumOutputsPerInput; ++j) { + APInt ThisVal = APInt(OpVal).trunc(DstBitSize); + Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); + if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal) + // Simply turn this into a SCALAR_TO_VECTOR of the new type. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + Ops[0]); + OpVal = OpVal.lshr(DstBitSize); + } + + // For big endian targets, swap the order of the pieces of each element. + if (TLI.isBigEndian()) + std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); + } + + return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + &Ops[0], Ops.size()); +} + +SDValue DAGCombiner::visitFADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fadd c1, c2) -> (fadd c1, c2) + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); + // canonicalize constant to RHS + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); + // fold (fadd A, 0) -> A + if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + return N0; + // fold (fadd A, (fneg B)) -> (fsub A, B) + if (isNegatibleForFree(N1, LegalOperations) == 2) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations)); + // fold (fadd (fneg A), B) -> (fsub B, A) + if (isNegatibleForFree(N0, LegalOperations) == 2) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, + GetNegatedExpression(N0, DAG, LegalOperations)); + + // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) + if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD && + N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + N0.getOperand(1), N1)); + + return SDValue(); +} + +SDValue DAGCombiner::visitFSUB(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fsub c1, c2) -> c1-c2 + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); + // fold (fsub A, 0) -> A + if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + return N0; + // fold (fsub 0, B) -> -B + if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) { + if (isNegatibleForFree(N1, LegalOperations)) + return GetNegatedExpression(N1, DAG, LegalOperations); + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); + } + // fold (fsub A, (fneg B)) -> (fadd A, B) + if (isNegatibleForFree(N1, LegalOperations)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations)); + + return SDValue(); +} + +SDValue DAGCombiner::visitFMUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fmul c1, c2) -> c1*c2 + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); + // canonicalize constant to RHS + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); + // fold (fmul A, 0) -> 0 + if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + return N1; + // fold (fmul X, 2.0) -> (fadd X, X) + if (N1CFP && N1CFP->isExactlyValue(+2.0)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); + // fold (fmul X, (fneg 1.0)) -> (fneg X) + if (N1CFP && N1CFP->isExactlyValue(-1.0)) + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); + + // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + // Both can be negated for free, check to see if at least one is cheaper + // negated. + if (LHSNeg == 2 || RHSNeg == 2) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + GetNegatedExpression(N0, DAG, LegalOperations), + GetNegatedExpression(N1, DAG, LegalOperations)); + } + } + + // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) + if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL && + N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), N1)); + + return SDValue(); +} + +SDValue DAGCombiner::visitFDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + + // fold (fdiv c1, c2) -> c1/c2 + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + + + // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + // Both can be negated for free, check to see if at least one is cheaper + // negated. + if (LHSNeg == 2 || RHSNeg == 2) + return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, + GetNegatedExpression(N0, DAG, LegalOperations), + GetNegatedExpression(N1, DAG, LegalOperations)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT VT = N->getValueType(0); + + // fold (frem c1, c2) -> fmod(c1,c2) + if (N0CFP && N1CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); + + return SDValue(); +} + +SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + MVT VT = N->getValueType(0); + + if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); + + if (N1CFP) { + const APFloat& V = N1CFP->getValueAPF(); + // copysign(x, c1) -> fabs(x) iff ispos(c1) + // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) + if (!V.isNegative()) { + if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + } else { + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0)); + } + } + + // copysign(fabs(x), y) -> copysign(x, y) + // copysign(fneg(x), y) -> copysign(x, y) + // copysign(copysign(x,z), y) -> copysign(x, y) + if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || + N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0.getOperand(0), N1); + + // copysign(x, abs(y)) -> abs(x) + if (N1.getOpcode() == ISD::FABS) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + + // copysign(x, copysign(y,z)) -> copysign(x, z) + if (N1.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0, N1.getOperand(1)); + + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + N0, N1.getOperand(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + MVT VT = N->getValueType(0); + MVT OpVT = N0.getValueType(); + + // fold (sint_to_fp c1) -> c1fp + if (N0C && OpVT != MVT::ppcf128) + return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + + // If the input is a legal type, and SINT_TO_FP is not legal on this target, + // but UINT_TO_FP is legal on this target, try to convert. + if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && + TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { + // If the sign bit is known to be zero, we can change this to UINT_TO_FP. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + MVT VT = N->getValueType(0); + MVT OpVT = N0.getValueType(); + + // fold (uint_to_fp c1) -> c1fp + if (N0C && OpVT != MVT::ppcf128) + return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + + // If the input is a legal type, and UINT_TO_FP is not legal on this target, + // but SINT_TO_FP is legal on this target, try to convert. + if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && + TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { + // If the sign bit is known to be zero, we can change this to SINT_TO_FP. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT VT = N->getValueType(0); + + // fold (fp_to_sint c1fp) -> c1 + if (N0CFP) + return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT VT = N->getValueType(0); + + // fold (fp_to_uint c1fp) -> c1 + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT VT = N->getValueType(0); + + // fold (fp_round c1fp) -> c1fp + if (N0CFP && N0.getValueType() != MVT::ppcf128) + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); + + // fold (fp_round (fp_extend x)) -> x + if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) + return N0.getOperand(0); + + // fold (fp_round (fp_round x)) -> (fp_round x) + if (N0.getOpcode() == ISD::FP_ROUND) { + // This is a value preserving truncation if both round's are. + bool IsTrunc = N->getConstantOperandVal(1) == 1 && + N0.getNode()->getConstantOperandVal(1) == 1; + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getIntPtrConstant(IsTrunc)); + } + + // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { + SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT, + N0.getOperand(0), N1); + AddToWorkList(Tmp.getNode()); + return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + Tmp, N0.getOperand(1)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + MVT VT = N->getValueType(0); + MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + + // fold (fp_round_inreg c1fp) -> c1fp + if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) { + SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT VT = N->getValueType(0); + + // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. + if (N->hasOneUse() && + N->use_begin()->getOpcode() == ISD::FP_ROUND) + return SDValue(); + + // fold (fp_extend c1fp) -> c1fp + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); + + // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the + // value of X. + if (N0.getOpcode() == ISD::FP_ROUND + && N0.getNode()->getConstantOperandVal(1) == 1) { + SDValue In = N0.getOperand(0); + if (In.getValueType() == VT) return In; + if (VT.bitsLT(In.getValueType())) + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, + In, N0.getOperand(1)); + return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In); + } + + // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) + if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), + N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFNEG(SDNode *N) { + SDValue N0 = N->getOperand(0); + + if (isNegatibleForFree(N0, LegalOperations)) + return GetNegatedExpression(N0, DAG, LegalOperations); + + // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading + // constant pool values. + if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && + N0.getOperand(0).getValueType().isInteger() && + !N0.getOperand(0).getValueType().isVector()) { + SDValue Int = N0.getOperand(0); + MVT IntVT = Int.getValueType(); + if (IntVT.isInteger() && !IntVT.isVector()) { + Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, + DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + AddToWorkList(Int.getNode()); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + N->getValueType(0), Int); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFABS(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + MVT VT = N->getValueType(0); + + // fold (fabs c1) -> fabs(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + // fold (fabs (fabs x)) -> (fabs x) + if (N0.getOpcode() == ISD::FABS) + return N->getOperand(0); + // fold (fabs (fneg x)) -> (fabs x) + // fold (fabs (fcopysign x, y)) -> (fabs x) + if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0)); + + // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading + // constant pool values. + if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && + N0.getOperand(0).getValueType().isInteger() && + !N0.getOperand(0).getValueType().isVector()) { + SDValue Int = N0.getOperand(0); + MVT IntVT = Int.getValueType(); + if (IntVT.isInteger() && !IntVT.isVector()) { + Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, + DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + AddToWorkList(Int.getNode()); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + N->getValueType(0), Int); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBRCOND(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + + // never taken branch, fold to chain + if (N1C && N1C->isNullValue()) + return Chain; + // unconditional branch + if (N1C && N1C->getAPIntValue() == 1) + return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2); + // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal + // on the target. + if (N1.getOpcode() == ISD::SETCC && + TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) { + return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + Chain, N1.getOperand(2), + N1.getOperand(0), N1.getOperand(1), N2); + } + + if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) { + // Match this pattern so that we can generate simpler code: + // + // %a = ... + // %b = and i32 %a, 2 + // %c = srl i32 %b, 1 + // brcond i32 %c ... + // + // into + // + // %a = ... + // %b = and %a, 2 + // %c = setcc eq %b, 0 + // brcond %c ... + // + // This applies only when the AND constant value has one bit set and the + // SRL constant is equal to the log2 of the AND constant. The back-end is + // smart enough to convert the result into a TEST/JMP sequence. + SDValue Op0 = N1.getOperand(0); + SDValue Op1 = N1.getOperand(1); + + if (Op0.getOpcode() == ISD::AND && + Op0.hasOneUse() && + Op1.getOpcode() == ISD::Constant) { + SDValue AndOp0 = Op0.getOperand(0); + SDValue AndOp1 = Op0.getOperand(1); + + if (AndOp1.getOpcode() == ISD::Constant) { + const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); + + if (AndConst.isPowerOf2() && + cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { + SDValue SetCC = + DAG.getSetCC(N->getDebugLoc(), + TLI.getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, Op0.getValueType()), + ISD::SETNE); + + // Replace the uses of SRL with SETCC + DAG.ReplaceAllUsesOfValueWith(N1, SetCC); + removeFromWorkList(N1.getNode()); + DAG.DeleteNode(N1.getNode()); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, SetCC, N2); + } + } + } + } + + return SDValue(); +} + +// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. +// +SDValue DAGCombiner::visitBR_CC(SDNode *N) { + CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); + SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); + + // Use SimplifySetCC to simplify SETCC's. + SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), + CondLHS, CondRHS, CC->get(), N->getDebugLoc(), + false); + if (Simp.getNode()) AddToWorkList(Simp.getNode()); + + ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.getNode()); + + // fold br_cc true, dest -> br dest (unconditional branch) + if (SCCC && !SCCC->isNullValue()) + return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, + N->getOperand(0), N->getOperand(4)); + // fold br_cc false, dest -> unconditional fall through + if (SCCC && SCCC->isNullValue()) + return N->getOperand(0); + + // fold to a simpler setcc + if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + N->getOperand(0), Simp.getOperand(2), + Simp.getOperand(0), Simp.getOperand(1), + N->getOperand(4)); + + return SDValue(); +} + +/// CombineToPreIndexedLoadStore - Try turning a load / store into a +/// pre-indexed load / store when the base pointer is an add or subtract +/// and it has other uses besides the load / store. After the +/// transformation, the new indexed load / store has effectively folded +/// the add / subtract in and all of its other uses are redirected to the +/// new load / store. +bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { + if (!LegalOperations) + return false; + + bool isLoad = true; + SDValue Ptr; + MVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else { + return false; + } + + // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail + // out. There is no reason to make this a preinc/predec. + if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || + Ptr.getNode()->hasOneUse()) + return false; + + // Ask the target to do addressing mode selection. + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) + return false; + // Don't create a indexed load / store with zero offset. + if (isa<ConstantSDNode>(Offset) && + cast<ConstantSDNode>(Offset)->isNullValue()) + return false; + + // Try turning it into a pre-indexed load / store except when: + // 1) The new base ptr is a frame index. + // 2) If N is a store and the new base ptr is either the same as or is a + // predecessor of the value being stored. + // 3) Another use of old base ptr is a predecessor of N. If ptr is folded + // that would create a cycle. + // 4) All uses are load / store ops that use it as old base ptr. + + // Check #1. Preinc'ing a frame index would require copying the stack pointer + // (plus the implicit offset) to a register to preinc anyway. + if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) + return false; + + // Check #2. + if (!isLoad) { + SDValue Val = cast<StoreSDNode>(N)->getValue(); + if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) + return false; + } + + // Now check for #3 and #4. + bool RealUse = false; + for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), + E = Ptr.getNode()->use_end(); I != E; ++I) { + SDNode *Use = *I; + if (Use == N) + continue; + if (Use->isPredecessorOf(N)) + return false; + + if (!((Use->getOpcode() == ISD::LOAD && + cast<LoadSDNode>(Use)->getBasePtr() == Ptr) || + (Use->getOpcode() == ISD::STORE && + cast<StoreSDNode>(Use)->getBasePtr() == Ptr))) + RealUse = true; + } + + if (!RealUse) + return false; + + SDValue Result; + if (isLoad) + Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + else + Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + ++PreIndexedNodes; + ++NodesCombined; + DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG)); + DOUT << '\n'; + WorkListRemover DeadNodes(*this); + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), + &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), + &DeadNodes); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), + &DeadNodes); + } + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + + // Replace the uses of Ptr with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0), + &DeadNodes); + removeFromWorkList(Ptr.getNode()); + DAG.DeleteNode(Ptr.getNode()); + + return true; +} + +/// CombineToPostIndexedLoadStore - Try to combine a load / store with a +/// add / sub of the base pointer node into a post-indexed load / store. +/// The transformation folded the add / subtract into the new indexed +/// load / store effectively and all of its uses are redirected to the +/// new load / store. +bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { + if (!LegalOperations) + return false; + + bool isLoad = true; + SDValue Ptr; + MVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else { + return false; + } + + if (Ptr.getNode()->hasOneUse()) + return false; + + for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), + E = Ptr.getNode()->use_end(); I != E; ++I) { + SDNode *Op = *I; + if (Op == N || + (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) + continue; + + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { + if (Ptr == Offset) + std::swap(BasePtr, Offset); + if (Ptr != BasePtr) + continue; + // Don't create a indexed load / store with zero offset. + if (isa<ConstantSDNode>(Offset) && + cast<ConstantSDNode>(Offset)->isNullValue()) + continue; + + // Try turning it into a post-indexed load / store except when + // 1) All uses are load / store ops that use it as base ptr. + // 2) Op must be independent of N, i.e. Op is neither a predecessor + // nor a successor of N. Otherwise, if Op is folded that would + // create a cycle. + + if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) + continue; + + // Check for #1. + bool TryNext = false; + for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(), + EE = BasePtr.getNode()->use_end(); II != EE; ++II) { + SDNode *Use = *II; + if (Use == Ptr.getNode()) + continue; + + // If all the uses are load / store addresses, then don't do the + // transformation. + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ + bool RealUse = false; + for (SDNode::use_iterator III = Use->use_begin(), + EEE = Use->use_end(); III != EEE; ++III) { + SDNode *UseUse = *III; + if (!((UseUse->getOpcode() == ISD::LOAD && + cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) || + (UseUse->getOpcode() == ISD::STORE && + cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use))) + RealUse = true; + } + + if (!RealUse) { + TryNext = true; + break; + } + } + } + + if (TryNext) + continue; + + // Check for #2 + if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { + SDValue Result = isLoad + ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM) + : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + BasePtr, Offset, AM); + ++PostIndexedNodes; + ++NodesCombined; + DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG)); + DOUT << '\n'; + WorkListRemover DeadNodes(*this); + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), + &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), + &DeadNodes); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), + &DeadNodes); + } + + // Finally, since the node is now dead, remove it from the graph. + DAG.DeleteNode(N); + + // Replace the uses of Use with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), + Result.getValue(isLoad ? 1 : 0), + &DeadNodes); + removeFromWorkList(Op); + DAG.DeleteNode(Op); + return true; + } + } + } + + return false; +} + +/// InferAlignment - If we can infer some alignment information from this +/// pointer, return it. +static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) { + // If this is a direct reference to a stack slot, use information about the + // stack slot's alignment. + int FrameIdx = 1 << 31; + int64_t FrameOffset = 0; + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { + FrameIdx = FI->getIndex(); + } else if (Ptr.getOpcode() == ISD::ADD && + isa<ConstantSDNode>(Ptr.getOperand(1)) && + isa<FrameIndexSDNode>(Ptr.getOperand(0))) { + FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + FrameOffset = Ptr.getConstantOperandVal(1); + } + + if (FrameIdx != (1 << 31)) { + // FIXME: Handle FI+CST. + const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo(); + if (MFI.isFixedObjectIndex(FrameIdx)) { + int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset; + + // The alignment of the frame index can be determined from its offset from + // the incoming frame position. If the frame object is at offset 32 and + // the stack is guaranteed to be 16-byte aligned, then we know that the + // object is 16-byte aligned. + unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment(); + unsigned Align = MinAlign(ObjectOffset, StackAlign); + + // Finally, the frame object itself may have a known alignment. Factor + // the alignment + offset into a new alignment. For example, if we know + // the FI is 8 byte aligned, but the pointer is 4 off, we really have a + // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte + // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc. + unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), + FrameOffset); + return std::max(Align, FIInfoAlign); + } + } + + return 0; +} + +SDValue DAGCombiner::visitLOAD(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + // Try to infer better alignment information than the load already has. + if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { + if (unsigned Align = InferAlignment(Ptr, DAG)) { + if (Align > LD->getAlignment()) + return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + LD->getValueType(0), + Chain, Ptr, LD->getSrcValue(), + LD->getSrcValueOffset(), LD->getMemoryVT(), + LD->isVolatile(), Align); + } + } + + // If load is not volatile and there are no uses of the loaded value (and + // the updated indexed value in case of indexed loads), change uses of the + // chain value into uses of the chain input (i.e. delete the dead load). + if (!LD->isVolatile()) { + if (N->getValueType(1) == MVT::Other) { + // Unindexed loads. + if (N->hasNUsesOfValue(0, 0)) { + // It's not safe to use the two value CombineTo variant here. e.g. + // v1, chain2 = load chain1, loc + // v2, chain3 = load chain2, loc + // v3 = add v2, c + // Now we replace use of chain2 with chain1. This makes the second load + // isomorphic to the one we are deleting, and thus makes this load live. + DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith chain: "; DEBUG(Chain.getNode()->dump(&DAG)); + DOUT << "\n"; + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes); + + if (N->use_empty()) { + removeFromWorkList(N); + DAG.DeleteNode(N); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } else { + // Indexed loads. + assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); + if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { + SDValue Undef = DAG.getUNDEF(N->getValueType(0)); + DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG)); + DOUT << "\nWith: "; DEBUG(Undef.getNode()->dump(&DAG)); + DOUT << " and 2 other values\n"; + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), + DAG.getUNDEF(N->getValueType(1)), + &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes); + removeFromWorkList(N); + DAG.DeleteNode(N); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + // If this load is directly stored, replace the load value with the stored + // value. + // TODO: Handle store large -> read small portion. + // TODO: Handle TRUNCSTORE/LOADEXT + if (LD->getExtensionType() == ISD::NON_EXTLOAD && + !LD->isVolatile()) { + if (ISD::isNON_TRUNCStore(Chain.getNode())) { + StoreSDNode *PrevST = cast<StoreSDNode>(Chain); + if (PrevST->getBasePtr() == Ptr && + PrevST->getValue().getValueType() == N->getValueType(0)) + return CombineTo(N, Chain.getOperand(1), Chain); + } + } + + if (CombinerAA) { + // Walk up chain skipping non-aliasing memory nodes. + SDValue BetterChain = FindBetterChain(N, Chain); + + // If there is a better chain. + if (Chain != BetterChain) { + SDValue ReplLoad; + + // Replace the chain to void dependency. + if (LD->getExtensionType() == ISD::NON_EXTLOAD) { + ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), + BetterChain, Ptr, + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); + } else { + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), + LD->getValueType(0), + BetterChain, Ptr, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->getMemoryVT(), + LD->isVolatile(), + LD->getAlignment()); + } + + // Create token factor to keep old chain connected. + SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, Chain, ReplLoad.getValue(1)); + + // Replace uses with load result and token factor. Don't add users + // to work list. + return CombineTo(N, ReplLoad.getValue(0), Token, false); + } + } + + // Try transforming N to an indexed load. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + + return SDValue(); +} + + +/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is +/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some +/// of the loaded bits, try narrowing the load and store if it would end up +/// being a win for performance or code size. +SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + if (ST->isVolatile()) + return SDValue(); + + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + MVT VT = Value.getValueType(); + + if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) + return SDValue(); + + unsigned Opc = Value.getOpcode(); + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || + Value.getOperand(1).getOpcode() != ISD::Constant) + return SDValue(); + + SDValue N0 = Value.getOperand(0); + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LD = cast<LoadSDNode>(N0); + if (LD->getBasePtr() != Ptr) + return SDValue(); + + // Find the type to narrow it the load / op / store to. + SDValue N1 = Value.getOperand(1); + unsigned BitWidth = N1.getValueSizeInBits(); + APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); + if (Opc == ISD::AND) + Imm ^= APInt::getAllOnesValue(BitWidth); + if (Imm == 0 || Imm.isAllOnesValue()) + return SDValue(); + unsigned ShAmt = Imm.countTrailingZeros(); + unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; + unsigned NewBW = NextPowerOf2(MSB - ShAmt); + MVT NewVT = MVT::getIntegerVT(NewBW); + while (NewBW < BitWidth && + !(TLI.isOperationLegalOrCustom(Opc, NewVT) && + TLI.isNarrowingProfitable(VT, NewVT))) { + NewBW = NextPowerOf2(NewBW); + NewVT = MVT::getIntegerVT(NewBW); + } + if (NewBW >= BitWidth) + return SDValue(); + + // If the lsb changed does not start at the type bitwidth boundary, + // start at the previous one. + if (ShAmt % NewBW) + ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; + APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW); + if ((Imm & Mask) == Imm) { + APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); + if (Opc == ISD::AND) + NewImm ^= APInt::getAllOnesValue(NewBW); + uint64_t PtrOff = ShAmt / 8; + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (TLI.isBigEndian()) + PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; + + unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); + if (NewAlign < + TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForMVT())) + return SDValue(); + + SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), + Ptr.getValueType(), Ptr, + DAG.getConstant(PtrOff, Ptr.getValueType())); + SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), + LD->getChain(), NewPtr, + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile(), NewAlign); + SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, + DAG.getConstant(NewImm, NewVT)); + SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), + NewVal, NewPtr, + ST->getSrcValue(), ST->getSrcValueOffset(), + false, NewAlign); + + AddToWorkList(NewPtr.getNode()); + AddToWorkList(NewLD.getNode()); + AddToWorkList(NewVal.getNode()); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1), + &DeadNodes); + ++OpsNarrowed; + return NewST; + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSTORE(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + + // Try to infer better alignment information than the store already has. + if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { + if (unsigned Align = InferAlignment(Ptr, DAG)) { + if (Align > ST->getAlignment()) + return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->getMemoryVT(), + ST->isVolatile(), Align); + } + } + + // If this is a store of a bit convert, store the input value if the + // resultant store does not need a higher alignment than the original. + if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && + ST->isUnindexed()) { + unsigned OrigAlign = ST->getAlignment(); + MVT SVT = Value.getOperand(0).getValueType(); + unsigned Align = TLI.getTargetData()-> + getABITypeAlignment(SVT.getTypeForMVT()); + if (Align <= OrigAlign && + ((!LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) + return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign); + } + + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { + // NOTE: If the original store is volatile, this transform must not increase + // the number of stores. For example, on x86-32 an f64 can be stored in one + // processor operation but an i64 (which is not legal) requires two. So the + // transform should not be done in this case. + if (Value.getOpcode() != ISD::TargetConstantFP) { + SDValue Tmp; + switch (CFP->getValueType(0).getSimpleVT()) { + default: assert(0 && "Unknown FP type"); + case MVT::f80: // We don't do this for these yet. + case MVT::f128: + case MVT::ppcf128: + break; + case MVT::f32: + if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations && + !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). + bitcastToAPInt().getZExtValue(), MVT::i32); + return DAG.getStore(Chain, N->getDebugLoc(), Tmp, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), + ST->getAlignment()); + } + break; + case MVT::f64: + if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations && + !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { + Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + getZExtValue(), MVT::i64); + return DAG.getStore(Chain, N->getDebugLoc(), Tmp, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->isVolatile(), + ST->getAlignment()); + } else if (!ST->isVolatile() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + // Many FP stores are not made apparent until after legalize, e.g. for + // argument passing. Since this is so common, custom legalize the + // 64-bit integer store into two 32-bit stores. + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); + SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); + if (TLI.isBigEndian()) std::swap(Lo, Hi); + + int SVOffset = ST->getSrcValueOffset(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + + SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), + isVolatile, ST->getAlignment()); + Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, + DAG.getConstant(4, Ptr.getValueType())); + SVOffset += 4; + Alignment = MinAlign(Alignment, 4U); + SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, + Ptr, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + St0, St1); + } + + break; + } + } + } + + if (CombinerAA) { + // Walk up chain skipping non-aliasing memory nodes. + SDValue BetterChain = FindBetterChain(N, Chain); + + // If there is a better chain. + if (Chain != BetterChain) { + // Replace the chain to avoid dependency. + SDValue ReplStore; + if (ST->isTruncatingStore()) { + ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, + ST->getSrcValue(),ST->getSrcValueOffset(), + ST->getMemoryVT(), + ST->isVolatile(), ST->getAlignment()); + } else { + ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->isVolatile(), ST->getAlignment()); + } + + // Create token to keep both nodes around. + SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + MVT::Other, Chain, ReplStore); + + // Don't add users to work list. + return CombineTo(N, Token, false); + } + } + + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + + // FIXME: is there such a thing as a truncating indexed store? + if (ST->isTruncatingStore() && ST->isUnindexed() && + Value.getValueType().isInteger()) { + // See if we can simplify the input to this truncstore with knowledge that + // only the low bits are being used. For example: + // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" + SDValue Shorter = + GetDemandedBits(Value, + APInt::getLowBitsSet(Value.getValueSizeInBits(), + ST->getMemoryVT().getSizeInBits())); + AddToWorkList(Value.getNode()); + if (Shorter.getNode()) + return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->getMemoryVT(), + ST->isVolatile(), ST->getAlignment()); + + // Otherwise, see if we can simplify the operation with + // SimplifyDemandedBits, which only works if the value has a single use. + if (SimplifyDemandedBits(Value, + APInt::getLowBitsSet( + Value.getValueSizeInBits(), + ST->getMemoryVT().getSizeInBits()))) + return SDValue(N, 0); + } + + // If this is a load followed by a store to the same location, then the store + // is dead/noop. + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { + if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && + ST->isUnindexed() && !ST->isVolatile() && + // There can't be any side effects between the load and store, such as + // a call or store. + Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { + // The store is dead, remove it. + return Chain; + } + } + + // If this is an FP_ROUND or TRUNC followed by a store, fold this into a + // truncating store. We can do this even if this is already a truncstore. + if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) + && Value.getNode()->hasOneUse() && ST->isUnindexed() && + TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), + ST->getMemoryVT())) { + return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->getMemoryVT(), + ST->isVolatile(), ST->getAlignment()); + } + + return ReduceLoadOpStoreWidth(N); +} + +SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { + SDValue InVec = N->getOperand(0); + SDValue InVal = N->getOperand(1); + SDValue EltNo = N->getOperand(2); + + // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new + // vector with the inserted element. + if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) { + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(), + InVec.getNode()->op_end()); + if (Elt < Ops.size()) + Ops[Elt] = InVal; + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + InVec.getValueType(), &Ops[0], Ops.size()); + } + // If the invec is an UNDEF and if EltNo is a constant, create a new + // BUILD_VECTOR with undef elements and the inserted element. + if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && + isa<ConstantSDNode>(EltNo)) { + MVT VT = InVec.getValueType(); + MVT EVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EVT)); + + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + if (Elt < Ops.size()) + Ops[Elt] = InVal; + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + InVec.getValueType(), &Ops[0], Ops.size()); + } + return SDValue(); +} + +SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { + // (vextract (scalar_to_vector val, 0) -> val + SDValue InVec = N->getOperand(0); + + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // If the operand is wider than the vector element type then it is implicitly + // truncated. Make that explicit here. + MVT EltVT = InVec.getValueType().getVectorElementType(); + SDValue InOp = InVec.getOperand(0); + if (InOp.getValueType() != EltVT) + return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp); + return InOp; + } + + // Perform only after legalization to ensure build_vector / vector_shuffle + // optimizations have already been done. + if (!LegalOperations) return SDValue(); + + // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) + SDValue EltNo = N->getOperand(1); + + if (isa<ConstantSDNode>(EltNo)) { + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + bool NewLoad = false; + bool BCNumEltsChanged = false; + MVT VT = InVec.getValueType(); + MVT EVT = VT.getVectorElementType(); + MVT LVT = EVT; + + if (InVec.getOpcode() == ISD::BIT_CONVERT) { + MVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || EVT.bitsGT(BCVT.getVectorElementType())) + return SDValue(); + if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) + BCNumEltsChanged = true; + InVec = InVec.getOperand(0); + EVT = BCVT.getVectorElementType(); + NewLoad = true; + } + + LoadSDNode *LN0 = NULL; + const ShuffleVectorSDNode *SVN = NULL; + if (ISD::isNormalLoad(InVec.getNode())) { + LN0 = cast<LoadSDNode>(InVec); + } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && + InVec.getOperand(0).getValueType() == EVT && + ISD::isNormalLoad(InVec.getOperand(0).getNode())) { + LN0 = cast<LoadSDNode>(InVec.getOperand(0)); + } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { + // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) + // => + // (load $addr+1*size) + + // If the bit convert changed the number of elements, it is unsafe + // to examine the mask. + if (BCNumEltsChanged) + return SDValue(); + + // Select the input vector, guarding against out of range extract vector. + unsigned NumElems = VT.getVectorNumElements(); + int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt); + InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); + + if (InVec.getOpcode() == ISD::BIT_CONVERT) + InVec = InVec.getOperand(0); + if (ISD::isNormalLoad(InVec.getNode())) { + LN0 = cast<LoadSDNode>(InVec); + Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems; + } + } + + if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile()) + return SDValue(); + + unsigned Align = LN0->getAlignment(); + if (NewLoad) { + // Check the resultant load doesn't need a higher alignment than the + // original load. + unsigned NewAlign = + TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForMVT()); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) + return SDValue(); + + Align = NewAlign; + } + + SDValue NewPtr = LN0->getBasePtr(); + if (Elt) { + unsigned PtrOff = LVT.getSizeInBits() * Elt / 8; + MVT PtrType = NewPtr.getValueType(); + if (TLI.isBigEndian()) + PtrOff = VT.getSizeInBits() / 8 - PtrOff; + NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, + DAG.getConstant(PtrOff, PtrType)); + } + + return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->isVolatile(), Align); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { + unsigned NumInScalars = N->getNumOperands(); + MVT VT = N->getValueType(0); + MVT EltType = VT.getVectorElementType(); + + // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT + // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from + // at most two distinct vectors, turn this into a shuffle node. + SDValue VecIn1, VecIn2; + for (unsigned i = 0; i != NumInScalars; ++i) { + // Ignore undef inputs. + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + // If this input is something other than a EXTRACT_VECTOR_ELT with a + // constant index, bail out. + if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { + VecIn1 = VecIn2 = SDValue(0, 0); + break; + } + + // If the input vector type disagrees with the result of the build_vector, + // we can't make a shuffle. + SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); + if (ExtractedFromVec.getValueType() != VT) { + VecIn1 = VecIn2 = SDValue(0, 0); + break; + } + + // Otherwise, remember this. We allow up to two distinct input vectors. + if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) + continue; + + if (VecIn1.getNode() == 0) { + VecIn1 = ExtractedFromVec; + } else if (VecIn2.getNode() == 0) { + VecIn2 = ExtractedFromVec; + } else { + // Too many inputs. + VecIn1 = VecIn2 = SDValue(0, 0); + break; + } + } + + // If everything is good, we can make a shuffle operation. + if (VecIn1.getNode()) { + SmallVector<int, 8> Mask; + for (unsigned i = 0; i != NumInScalars; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } + + // If extracting from the first vector, just use the index directly. + SDValue Extract = N->getOperand(i); + SDValue ExtVal = Extract.getOperand(1); + if (Extract.getOperand(0) == VecIn1) { + unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); + if (ExtIndex > VT.getVectorNumElements()) + return SDValue(); + + Mask.push_back(ExtIndex); + continue; + } + + // Otherwise, use InIdx + VecSize + unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue(); + Mask.push_back(Idx+NumInScalars); + } + + // Add count and size info. + if (!TLI.isTypeLegal(VT) && LegalTypes) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + SDValue Ops[2]; + Ops[0] = VecIn1; + Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { + // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of + // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector + // inputs come from at most two distinct vectors, turn this into a shuffle + // node. + + // If we only have one input vector, we don't need to do any concatenation. + if (N->getNumOperands() == 1) + return N->getOperand(0); + + return SDValue(); +} + +SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { + return SDValue(); + + MVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + assert(N0.getValueType().getVectorNumElements() == NumElts && + "Vector shuffle must be normalized in DAG"); + + // FIXME: implement canonicalizations from DAG.getVectorShuffle() + + // If it is a splat, check if the argument vector is a build_vector with + // all scalar elements the same. + if (cast<ShuffleVectorSDNode>(N)->isSplat()) { + SDNode *V = N0.getNode(); + + + // If this is a bit convert that changes the element type of the vector but + // not the number of vector elements, look through it. Be careful not to + // look though conversions that change things like v4f32 to v2f64. + if (V->getOpcode() == ISD::BIT_CONVERT) { + SDValue ConvInput = V->getOperand(0); + if (ConvInput.getValueType().isVector() && + ConvInput.getValueType().getVectorNumElements() == NumElts) + V = ConvInput.getNode(); + } + + if (V->getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElems = V->getNumOperands(); + unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex(); + if (NumElems > BaseIdx) { + SDValue Base; + bool AllSame = true; + for (unsigned i = 0; i != NumElems; ++i) { + if (V->getOperand(i).getOpcode() != ISD::UNDEF) { + Base = V->getOperand(i); + break; + } + } + // Splat of <u, u, u, u>, return <u, u, u, u> + if (!Base.getNode()) + return N0; + for (unsigned i = 0; i != NumElems; ++i) { + if (V->getOperand(i) != Base) { + AllSame = false; + break; + } + } + // Splat of <x, x, x, x>, return <x, x, x, x> + if (AllSame) + return N0; + } + } + } + return SDValue(); +} + +/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform +/// an AND to a vector_shuffle with the destination vector and a zero vector. +/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> +/// vector_shuffle V, Zero, <0, 4, 2, 4> +SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (N->getOpcode() == ISD::AND) { + if (RHS.getOpcode() == ISD::BIT_CONVERT) + RHS = RHS.getOperand(0); + if (RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<int, 8> Indices; + unsigned NumElts = RHS.getNumOperands(); + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Elt = RHS.getOperand(i); + if (!isa<ConstantSDNode>(Elt)) + return SDValue(); + else if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) + Indices.push_back(i); + else if (cast<ConstantSDNode>(Elt)->isNullValue()) + Indices.push_back(NumElts); + else + return SDValue(); + } + + // Let's see if the target supports this vector_shuffle. + MVT RVT = RHS.getValueType(); + if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + MVT EVT = RVT.getVectorElementType(); + SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), + DAG.getConstant(0, EVT)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + RVT, &ZeroOps[0], ZeroOps.size()); + LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); + SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf); + } + } + + return SDValue(); +} + +/// SimplifyVBinOp - Visit a binary vector operation, like ADD. +SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { + // After legalize, the target may be depending on adds and other + // binary ops to provide legal ways to construct constants or other + // things. Simplifying them may result in a loss of legality. + if (LegalOperations) return SDValue(); + + MVT VT = N->getValueType(0); + assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); + + MVT EltType = VT.getVectorElementType(); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Shuffle = XformToShuffleWithZero(N); + if (Shuffle.getNode()) return Shuffle; + + // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold + // this operation. + if (LHS.getOpcode() == ISD::BUILD_VECTOR && + RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { + SDValue LHSOp = LHS.getOperand(i); + SDValue RHSOp = RHS.getOperand(i); + // If these two elements can't be folded, bail out. + if ((LHSOp.getOpcode() != ISD::UNDEF && + LHSOp.getOpcode() != ISD::Constant && + LHSOp.getOpcode() != ISD::ConstantFP) || + (RHSOp.getOpcode() != ISD::UNDEF && + RHSOp.getOpcode() != ISD::Constant && + RHSOp.getOpcode() != ISD::ConstantFP)) + break; + + // Can't fold divide by zero. + if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || + N->getOpcode() == ISD::FDIV) { + if ((RHSOp.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) || + (RHSOp.getOpcode() == ISD::ConstantFP && + cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) + break; + } + + Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), + EltType, LHSOp, RHSOp)); + AddToWorkList(Ops.back().getNode()); + assert((Ops.back().getOpcode() == ISD::UNDEF || + Ops.back().getOpcode() == ISD::Constant || + Ops.back().getOpcode() == ISD::ConstantFP) && + "Scalar binop didn't fold!"); + } + + if (Ops.size() == LHS.getNumOperands()) { + MVT VT = LHS.getValueType(); + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &Ops[0], Ops.size()); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, + SDValue N1, SDValue N2){ + assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); + + SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + + // If we got a simplified select_cc node back from SimplifySelectCC, then + // break it down into a new SETCC node, and a new SELECT node, and then return + // the SELECT node, since we were called with a SELECT node. + if (SCC.getNode()) { + // Check to see if we got a select_cc back (to turn into setcc/select). + // Otherwise, just return whatever node we got back, like fabs. + if (SCC.getOpcode() == ISD::SELECT_CC) { + SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(), + N0.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), + SCC.getOperand(4)); + AddToWorkList(SETCC.getNode()); + return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(), + SCC.getOperand(2), SCC.getOperand(3), SETCC); + } + + return SCC; + } + return SDValue(); +} + +/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS +/// are the two values being selected between, see if we can simplify the +/// select. Callers of this should assume that TheSelect is deleted if this +/// returns true. As such, they should return the appropriate thing (e.g. the +/// node) back to the top-level of the DAG combiner loop to avoid it being +/// looked at. +bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, + SDValue RHS) { + + // If this is a select from two identical things, try to pull the operation + // through the select. + if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){ + // If this is a load and the token chain is identical, replace the select + // of two loads with a load through a select of the address to load from. + // This triggers in things like "select bool X, 10.0, 123.0" after the FP + // constants have been dropped into the constant pool. + if (LHS.getOpcode() == ISD::LOAD && + // Do not let this transformation reduce the number of volatile loads. + !cast<LoadSDNode>(LHS)->isVolatile() && + !cast<LoadSDNode>(RHS)->isVolatile() && + // Token chains must be identical. + LHS.getOperand(0) == RHS.getOperand(0)) { + LoadSDNode *LLD = cast<LoadSDNode>(LHS); + LoadSDNode *RLD = cast<LoadSDNode>(RHS); + + // If this is an EXTLOAD, the VT's must match. + if (LLD->getMemoryVT() == RLD->getMemoryVT()) { + // FIXME: this conflates two src values, discarding one. This is not + // the right thing to do, but nothing uses srcvalues now. When they do, + // turn SrcValue into a list of locations. + SDValue Addr; + if (TheSelect->getOpcode() == ISD::SELECT) { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) { + Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); + } + } else { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()) && + !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())) { + Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), + TheSelect->getOperand(1), + LLD->getBasePtr(), RLD->getBasePtr(), + TheSelect->getOperand(4)); + } + } + + if (Addr.getNode()) { + SDValue Load; + if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { + Load = DAG.getLoad(TheSelect->getValueType(0), + TheSelect->getDebugLoc(), + LLD->getChain(), + Addr,LLD->getSrcValue(), + LLD->getSrcValueOffset(), + LLD->isVolatile(), + LLD->getAlignment()); + } else { + Load = DAG.getExtLoad(LLD->getExtensionType(), + TheSelect->getDebugLoc(), + TheSelect->getValueType(0), + LLD->getChain(), Addr, LLD->getSrcValue(), + LLD->getSrcValueOffset(), + LLD->getMemoryVT(), + LLD->isVolatile(), + LLD->getAlignment()); + } + + // Users of the select now use the result of the load. + CombineTo(TheSelect, Load); + + // Users of the old loads now use the new load's chain. We know the + // old-load value is dead now. + CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); + CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); + return true; + } + } + } + } + + return false; +} + +/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 +/// where 'cond' is the comparison specified by CC. +SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, + ISD::CondCode CC, bool NotExtCompare) { + // (x ? y : y) -> y. + if (N2 == N3) return N2; + + MVT VT = N2.getValueType(); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); + + // Determine if the condition we're dealing with is constant + SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC, DL, false); + if (SCC.getNode()) AddToWorkList(SCC.getNode()); + ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); + + // fold select_cc true, x, y -> x + if (SCCC && !SCCC->isNullValue()) + return N2; + // fold select_cc false, x, y -> y + if (SCCC && SCCC->isNullValue()) + return N3; + + // Check to see if we can simplify the select into an fabs node + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { + // Allow either -0.0 or 0.0 + if (CFP->getValueAPF().isZero()) { + // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs + if ((CC == ISD::SETGE || CC == ISD::SETGT) && + N0 == N2 && N3.getOpcode() == ISD::FNEG && + N2 == N3.getOperand(0)) + return DAG.getNode(ISD::FABS, DL, VT, N0); + + // select (setl[te] X, +/-0.0), fneg(X), X -> fabs + if ((CC == ISD::SETLT || CC == ISD::SETLE) && + N0 == N3 && N2.getOpcode() == ISD::FNEG && + N2.getOperand(0) == N3) + return DAG.getNode(ISD::FABS, DL, VT, N3); + } + } + + // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" + // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 + // in it. This is a win when the constant is not otherwise available because + // it replaces two constant pool loads with one. We only do this if the FP + // type is known to be legal, because if it isn't, then we are before legalize + // types an we want the other legalization to happen first (e.g. to avoid + // messing with soft float) and if the ConstantFP is not legal, because if + // it is legal, we may not need to store the FP constant in a constant pool. + if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) + if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { + if (TLI.isTypeLegal(N2.getValueType()) && + (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != + TargetLowering::Legal) && + // If both constants have multiple uses, then we won't need to do an + // extra load, they are likely around in registers for other users. + (TV->hasOneUse() || FV->hasOneUse())) { + Constant *Elts[] = { + const_cast<ConstantFP*>(FV->getConstantFPValue()), + const_cast<ConstantFP*>(TV->getConstantFPValue()) + }; + const Type *FPTy = Elts[0]->getType(); + const TargetData &TD = *TLI.getTargetData(); + + // Create a ConstantArray of the two constants. + Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2); + SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), + TD.getPrefTypeAlignment(FPTy)); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + + // Get the offsets to the 0 and 1 element of the array so that we can + // select between them. + SDValue Zero = DAG.getIntPtrConstant(0); + unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); + SDValue One = DAG.getIntPtrConstant(EltSize); + + SDValue Cond = DAG.getSetCC(DL, + TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC); + SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), + Cond, One, Zero); + CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, + CstOffset); + return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, false, + Alignment); + + } + } + + // Check to see if we can perform the "gzip trick", transforming + // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) + if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && + N0.getValueType().isInteger() && + N2.getValueType().isInteger() && + (N1C->isNullValue() || // (a < 0) ? b : 0 + (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 + MVT XType = N0.getValueType(); + MVT AType = N2.getValueType(); + if (XType.bitsGE(AType)) { + // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a + // single-bit constant. + if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { + unsigned ShCtV = N2C->getAPIntValue().logBase2(); + ShCtV = XType.getSizeInBits()-ShCtV-1; + SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy()); + SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), + XType, N0, ShCt); + AddToWorkList(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorkList(Shift.getNode()); + } + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), + XType, N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + AddToWorkList(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorkList(Shift.getNode()); + } + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + } + + // fold select C, 16, 0 -> shl C, 4 + if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && + TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) { + + // If the caller doesn't want us to simplify this into a zext of a compare, + // don't do it. + if (NotExtCompare && N2C->getAPIntValue() == 1) + return SDValue(); + + // Get a SetCC of the condition + // FIXME: Should probably make sure that setcc is legal if we ever have a + // target where it isn't. + SDValue Temp, SCC; + // cast from setcc result type to select result type + if (LegalTypes) { + SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC); + if (N2.getValueType().bitsLT(SCC.getValueType())) + Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType()); + else + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + N2.getValueType(), SCC); + } else { + SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + N2.getValueType(), SCC); + } + + AddToWorkList(SCC.getNode()); + AddToWorkList(Temp.getNode()); + + if (N2C->getAPIntValue() == 1) + return Temp; + + // shl setcc result by log2 n2c + return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy())); + } + + // Check to see if this is the equivalent of setcc + // FIXME: Turn all of these into setcc if setcc if setcc is legal + // otherwise, go ahead with the folds. + if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { + MVT XType = N0.getValueType(); + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { + SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); + if (Res.getValueType() != VT) + Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); + return Res; + } + + // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) + if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && + (!LegalOperations || + TLI.isOperationLegal(ISD::CTLZ, XType))) { + SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); + return DAG.getNode(ISD::SRL, DL, XType, Ctlz, + DAG.getConstant(Log2_32(XType.getSizeInBits()), + getShiftAmountTy())); + } + // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) + if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { + SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(), + XType, DAG.getConstant(0, XType), N0); + SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType); + return DAG.getNode(ISD::SRL, DL, XType, + DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + } + // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) + if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { + SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); + } + } + + // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) && + N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) && + N2.getOperand(0) == N1 && N0.getValueType().isInteger()) { + MVT XType = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType, + N0, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); + } + // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT && + N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) { + if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) { + MVT XType = N0.getValueType(); + if (SubC->isNullValue() && XType.isInteger()) { + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, + N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), + XType, N0, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); + } + } + } + + return SDValue(); +} + +/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. +SDValue DAGCombiner::SimplifySetCC(MVT VT, SDValue N0, + SDValue N1, ISD::CondCode Cond, + DebugLoc DL, bool foldBooleans) { + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, Level == Unrestricted, false, this); + return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); +} + +/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue DAGCombiner::BuildSDIV(SDNode *N) { + std::vector<SDNode*> Built; + SDValue S = TLI.BuildSDIV(N, DAG, &Built); + + for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); + ii != ee; ++ii) + AddToWorkList(*ii); + return S; +} + +/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue DAGCombiner::BuildUDIV(SDNode *N) { + std::vector<SDNode*> Built; + SDValue S = TLI.BuildUDIV(N, DAG, &Built); + + for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); + ii != ee; ++ii) + AddToWorkList(*ii); + return S; +} + +/// FindBaseOffset - Return true if base is known not to alias with anything +/// but itself. Provides base object and offset as results. +static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) { + // Assume it is a primitive operation. + Base = Ptr; Offset = 0; + + // If it's an adding a simple constant then integrate the offset. + if (Base.getOpcode() == ISD::ADD) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { + Base = Base.getOperand(0); + Offset += C->getZExtValue(); + } + } + + // If it's any of the following then it can't alias with anything but itself. + return isa<FrameIndexSDNode>(Base) || + isa<ConstantPoolSDNode>(Base) || + isa<GlobalAddressSDNode>(Base); +} + +/// isAlias - Return true if there is any possibility that the two addresses +/// overlap. +bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, + const Value *SrcValue1, int SrcValueOffset1, + SDValue Ptr2, int64_t Size2, + const Value *SrcValue2, int SrcValueOffset2) const { + // If they are the same then they must be aliases. + if (Ptr1 == Ptr2) return true; + + // Gather base node and offset information. + SDValue Base1, Base2; + int64_t Offset1, Offset2; + bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1); + bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2); + + // If they have a same base address then... + if (Base1 == Base2) + // Check to see if the addresses overlap. + return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + + // If we know both bases then they can't alias. + if (KnownBase1 && KnownBase2) return false; + + if (CombinerGlobalAA) { + // Use alias analysis information. + int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); + int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; + int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; + AliasAnalysis::AliasResult AAResult = + AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2); + if (AAResult == AliasAnalysis::NoAlias) + return false; + } + + // Otherwise we have to assume they alias. + return true; +} + +/// FindAliasInfo - Extracts the relevant alias information from the memory +/// node. Returns true if the operand was a load. +bool DAGCombiner::FindAliasInfo(SDNode *N, + SDValue &Ptr, int64_t &Size, + const Value *&SrcValue, int &SrcValueOffset) const { + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + Ptr = LD->getBasePtr(); + Size = LD->getMemoryVT().getSizeInBits() >> 3; + SrcValue = LD->getSrcValue(); + SrcValueOffset = LD->getSrcValueOffset(); + return true; + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + Ptr = ST->getBasePtr(); + Size = ST->getMemoryVT().getSizeInBits() >> 3; + SrcValue = ST->getSrcValue(); + SrcValueOffset = ST->getSrcValueOffset(); + } else { + assert(0 && "FindAliasInfo expected a memory operand"); + } + + return false; +} + +/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, +/// looking for aliasing nodes and adding them to the Aliases vector. +void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, + SmallVector<SDValue, 8> &Aliases) { + SmallVector<SDValue, 8> Chains; // List of chains to visit. + std::set<SDNode *> Visited; // Visited node set. + + // Get alias information for node. + SDValue Ptr; + int64_t Size = 0; + const Value *SrcValue = 0; + int SrcValueOffset = 0; + bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset); + + // Starting off. + Chains.push_back(OriginalChain); + + // Look at each chain and determine if it is an alias. If so, add it to the + // aliases list. If not, then continue up the chain looking for the next + // candidate. + while (!Chains.empty()) { + SDValue Chain = Chains.back(); + Chains.pop_back(); + + // Don't bother if we've been before. + if (Visited.find(Chain.getNode()) != Visited.end()) continue; + Visited.insert(Chain.getNode()); + + switch (Chain.getOpcode()) { + case ISD::EntryToken: + // Entry token is ideal chain operand, but handled in FindBetterChain. + break; + + case ISD::LOAD: + case ISD::STORE: { + // Get alias information for Chain. + SDValue OpPtr; + int64_t OpSize = 0; + const Value *OpSrcValue = 0; + int OpSrcValueOffset = 0; + bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, + OpSrcValue, OpSrcValueOffset); + + // If chain is alias then stop here. + if (!(IsLoad && IsOpLoad) && + isAlias(Ptr, Size, SrcValue, SrcValueOffset, + OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) { + Aliases.push_back(Chain); + } else { + // Look further up the chain. + Chains.push_back(Chain.getOperand(0)); + // Clean up old chain. + AddToWorkList(Chain.getNode()); + } + break; + } + + case ISD::TokenFactor: + // We have to check each of the operands of the token factor, so we queue + // then up. Adding the operands to the queue (stack) in reverse order + // maintains the original order and increases the likelihood that getNode + // will find a matching token factor (CSE.) + for (unsigned n = Chain.getNumOperands(); n;) + Chains.push_back(Chain.getOperand(--n)); + // Eliminate the token factor if we can. + AddToWorkList(Chain.getNode()); + break; + + default: + // For all other instructions we will just have to take what we can get. + Aliases.push_back(Chain); + break; + } + } +} + +/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking +/// for a better chain (aliasing node.) +SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { + SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. + + // Accumulate all the aliases to this node. + GatherAllAliases(N, OldChain, Aliases); + + if (Aliases.size() == 0) { + // If no operands then chain to entry token. + return DAG.getEntryNode(); + } else if (Aliases.size() == 1) { + // If a single operand then chain to it. We don't need to revisit it. + return Aliases[0]; + } + + // Construct a custom tailored token factor. + SDValue NewChain = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + &Aliases[0], Aliases.size()); + + // Make sure the old chain gets cleaned up. + if (NewChain != OldChain) AddToWorkList(OldChain.getNode()); + + return NewChain; +} + +// SelectionDAG::Combine - This is the entry point for the file. +// +void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, + CodeGenOpt::Level OptLevel) { + /// run - This is the main entry point to this class. + /// + DAGCombiner(*this, AA, OptLevel).Run(Level); +} diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp new file mode 100644 index 000000000000..6becff32176e --- /dev/null +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -0,0 +1,1033 @@ +///===-- FastISel.cpp - Implementation of the FastISel class --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the FastISel class. +// +// "Fast" instruction selection is designed to emit very poor code quickly. +// Also, it is not designed to be able to do much lowering, so most illegal +// types (e.g. i64 on 32-bit targets) and operations are not supported. It is +// also not intended to be able to do much optimization, except in a few cases +// where doing optimizations reduces overall compile time. For example, folding +// constants into immediate fields is often done, because it's cheap and it +// reduces the number of instructions later phases have to examine. +// +// "Fast" instruction selection is able to fail gracefully and transfer +// control to the SelectionDAG selector for operations that it doesn't +// support. In many cases, this allows us to avoid duplicating a lot of +// the complicated lowering logic that SelectionDAG currently has. +// +// The intended use for "fast" instruction selection is "-O0" mode +// compilation, where the quality of the generated code is irrelevant when +// weighed against the speed at which the code can be generated. Also, +// at -O0, the LLVM optimizers are not running, and this makes the +// compile time of codegen a much higher portion of the overall compile +// time. Despite its limitations, "fast" instruction selection is able to +// handle enough code on its own to provide noticeable overall speedups +// in -O0 compiles. +// +// Basic operations are supported in a target-independent way, by reading +// the same instruction descriptions that the SelectionDAG selector reads, +// and identifying simple arithmetic operations that can be directly selected +// from simple operators. More complicated operations currently require +// target-specific code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/DebugLoc.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "SelectionDAGBuild.h" +using namespace llvm; + +unsigned FastISel::getRegForValue(Value *V) { + MVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); + // Don't handle non-simple values in FastISel. + if (!RealVT.isSimple()) + return 0; + + // Ignore illegal types. We must do this before looking up the value + // in ValueMap because Arguments are given virtual registers regardless + // of whether FastISel can handle them. + MVT::SimpleValueType VT = RealVT.getSimpleVT(); + if (!TLI.isTypeLegal(VT)) { + // Promote MVT::i1 to a legal type though, because it's common and easy. + if (VT == MVT::i1) + VT = TLI.getTypeToTransformTo(VT).getSimpleVT(); + else + return 0; + } + + // Look up the value to see if we already have a register for it. We + // cache values defined by Instructions across blocks, and other values + // only locally. This is because Instructions already have the SSA + // def-dominatess-use requirement enforced. + if (ValueMap.count(V)) + return ValueMap[V]; + unsigned Reg = LocalValueMap[V]; + if (Reg != 0) + return Reg; + + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getValue().getActiveBits() <= 64) + Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); + } else if (isa<AllocaInst>(V)) { + Reg = TargetMaterializeAlloca(cast<AllocaInst>(V)); + } else if (isa<ConstantPointerNull>(V)) { + // Translate this as an integer zero so that it can be + // local-CSE'd with actual integer zeros. + Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType())); + } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + + if (!Reg) { + const APFloat &Flt = CF->getValueAPF(); + MVT IntVT = TLI.getPointerTy(); + + uint64_t x[2]; + uint32_t IntBitWidth = IntVT.getSizeInBits(); + bool isExact; + (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, + APFloat::rmTowardZero, &isExact); + if (isExact) { + APInt IntVal(IntBitWidth, 2, x); + + unsigned IntegerReg = getRegForValue(ConstantInt::get(IntVal)); + if (IntegerReg != 0) + Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg); + } + } + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (!SelectOperator(CE, CE->getOpcode())) return 0; + Reg = LocalValueMap[CE]; + } else if (isa<UndefValue>(V)) { + Reg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(MBB, DL, TII.get(TargetInstrInfo::IMPLICIT_DEF), Reg); + } + + // If target-independent code couldn't handle the value, give target-specific + // code a try. + if (!Reg && isa<Constant>(V)) + Reg = TargetMaterializeConstant(cast<Constant>(V)); + + // Don't cache constant materializations in the general ValueMap. + // To do so would require tracking what uses they dominate. + if (Reg != 0) + LocalValueMap[V] = Reg; + return Reg; +} + +unsigned FastISel::lookUpRegForValue(Value *V) { + // Look up the value to see if we already have a register for it. We + // cache values defined by Instructions across blocks, and other values + // only locally. This is because Instructions already have the SSA + // def-dominatess-use requirement enforced. + if (ValueMap.count(V)) + return ValueMap[V]; + return LocalValueMap[V]; +} + +/// UpdateValueMap - Update the value map to include the new mapping for this +/// instruction, or insert an extra copy to get the result in a previous +/// determined register. +/// NOTE: This is only necessary because we might select a block that uses +/// a value before we select the block that defines the value. It might be +/// possible to fix this by selecting blocks in reverse postorder. +unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) { + if (!isa<Instruction>(I)) { + LocalValueMap[I] = Reg; + return Reg; + } + + unsigned &AssignedReg = ValueMap[I]; + if (AssignedReg == 0) + AssignedReg = Reg; + else if (Reg != AssignedReg) { + const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); + TII.copyRegToReg(*MBB, MBB->end(), AssignedReg, + Reg, RegClass, RegClass); + } + return AssignedReg; +} + +unsigned FastISel::getRegForGEPIndex(Value *Idx) { + unsigned IdxN = getRegForValue(Idx); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return 0; + + // If the index is smaller or larger than intptr_t, truncate or extend it. + MVT PtrVT = TLI.getPointerTy(); + MVT IdxVT = MVT::getMVT(Idx->getType(), /*HandleUnknown=*/false); + if (IdxVT.bitsLT(PtrVT)) + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(), + ISD::SIGN_EXTEND, IdxN); + else if (IdxVT.bitsGT(PtrVT)) + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(), + ISD::TRUNCATE, IdxN); + return IdxN; +} + +/// SelectBinaryOp - Select and emit code for a binary operator instruction, +/// which has an opcode which directly corresponds to the given ISD opcode. +/// +bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) { + MVT VT = MVT::getMVT(I->getType(), /*HandleUnknown=*/true); + if (VT == MVT::Other || !VT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // We only handle legal types. For example, on x86-32 the instruction + // selector contains all of the 64-bit instructions from x86-64, + // under the assumption that i64 won't be used if the target doesn't + // support it. + if (!TLI.isTypeLegal(VT)) { + // MVT::i1 is special. Allow AND, OR, or XOR because they + // don't require additional zeroing, which makes them easy. + if (VT == MVT::i1 && + (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || + ISDOpcode == ISD::XOR)) + VT = TLI.getTypeToTransformTo(VT); + else + return false; + } + + unsigned Op0 = getRegForValue(I->getOperand(0)); + if (Op0 == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + // Check if the second operand is a constant and handle it appropriately. + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, CI->getZExtValue()); + if (ResultReg != 0) { + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + } + + // Check if the second operand is a constant float. + if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) { + unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, CF); + if (ResultReg != 0) { + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + } + + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (Op1 == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + // Now we have both operands in registers. Emit the instruction. + unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, Op1); + if (ResultReg == 0) + // Target-specific code wasn't able to find a machine opcode for + // the given ISD opcode and type. Halt "fast" selection and bail. + return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; +} + +bool FastISel::SelectGetElementPtr(User *I) { + unsigned N = getRegForValue(I->getOperand(0)); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + const Type *Ty = I->getOperand(0)->getType(); + MVT::SimpleValueType VT = TLI.getPointerTy().getSimpleVT(); + for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end(); + OI != E; ++OI) { + Value *Idx = *OI; + if (const StructType *StTy = dyn_cast<StructType>(Ty)) { + unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); + if (Field) { + // N = N + Offset + uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field); + // FIXME: This can be optimized by combining the add with a + // subsequent one. + N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } + Ty = StTy->getElementType(Field); + } else { + Ty = cast<SequentialType>(Ty)->getElementType(); + + // If this is a constant subscript, handle it quickly. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->getZExtValue() == 0) continue; + uint64_t Offs = + TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + continue; + } + + // N = N + Idx * ElementSize; + uint64_t ElementSize = TD.getTypeAllocSize(Ty); + unsigned IdxN = getRegForGEPIndex(Idx); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + if (ElementSize != 1) { + IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } + N = FastEmit_rr(VT, VT, ISD::ADD, N, IdxN); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } + } + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, N); + return true; +} + +bool FastISel::SelectCall(User *I) { + Function *F = cast<CallInst>(I)->getCalledFunction(); + if (!F) return false; + + unsigned IID = F->getIntrinsicID(); + switch (IID) { + default: break; + case Intrinsic::dbg_stoppoint: { + DbgStopPointInst *SPI = cast<DbgStopPointInst>(I); + if (DIDescriptor::ValidDebugInfo(SPI->getContext(), CodeGenOpt::None)) { + DICompileUnit CU(cast<GlobalVariable>(SPI->getContext())); + unsigned Line = SPI->getLine(); + unsigned Col = SPI->getColumn(); + unsigned Idx = MF.getOrCreateDebugLocID(CU.getGV(), Line, Col); + setCurDebugLoc(DebugLoc::get(Idx)); + } + return true; + } + case Intrinsic::dbg_region_start: { + DbgRegionStartInst *RSI = cast<DbgRegionStartInst>(I); + if (DIDescriptor::ValidDebugInfo(RSI->getContext(), CodeGenOpt::None) && + DW && DW->ShouldEmitDwarfDebug()) { + unsigned ID = + DW->RecordRegionStart(cast<GlobalVariable>(RSI->getContext())); + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); + BuildMI(MBB, DL, II).addImm(ID); + } + return true; + } + case Intrinsic::dbg_region_end: { + DbgRegionEndInst *REI = cast<DbgRegionEndInst>(I); + if (DIDescriptor::ValidDebugInfo(REI->getContext(), CodeGenOpt::None) && + DW && DW->ShouldEmitDwarfDebug()) { + unsigned ID = 0; + DISubprogram Subprogram(cast<GlobalVariable>(REI->getContext())); + if (!Subprogram.isNull() && !Subprogram.describes(MF.getFunction())) { + // This is end of an inlined function. + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); + ID = DW->RecordInlinedFnEnd(Subprogram); + if (ID) + // Returned ID is 0 if this is unbalanced "end of inlined + // scope". This could happen if optimizer eats dbg intrinsics + // or "beginning of inlined scope" is not recoginized due to + // missing location info. In such cases, do ignore this region.end. + BuildMI(MBB, DL, II).addImm(ID); + } else { + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); + ID = DW->RecordRegionEnd(cast<GlobalVariable>(REI->getContext())); + BuildMI(MBB, DL, II).addImm(ID); + } + } + return true; + } + case Intrinsic::dbg_func_start: { + DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I); + Value *SP = FSI->getSubprogram(); + if (!DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::None)) + return true; + + // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what + // (most?) gdb expects. + DebugLoc PrevLoc = DL; + DISubprogram Subprogram(cast<GlobalVariable>(SP)); + DICompileUnit CompileUnit = Subprogram.getCompileUnit(); + + if (!Subprogram.describes(MF.getFunction())) { + // This is a beginning of an inlined function. + + // If llvm.dbg.func.start is seen in a new block before any + // llvm.dbg.stoppoint intrinsic then the location info is unknown. + // FIXME : Why DebugLoc is reset at the beginning of each block ? + if (PrevLoc.isUnknown()) + return true; + // Record the source line. + unsigned Line = Subprogram.getLineNumber(); + setCurDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID( + CompileUnit.getGV(), Line, 0))); + + if (DW && DW->ShouldEmitDwarfDebug()) { + DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc); + unsigned LabelID = DW->RecordInlinedFnStart(Subprogram, + DICompileUnit(PrevLocTpl.CompileUnit), + PrevLocTpl.Line, + PrevLocTpl.Col); + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); + BuildMI(MBB, DL, II).addImm(LabelID); + } + } else { + // Record the source line. + unsigned Line = Subprogram.getLineNumber(); + MF.setDefaultDebugLoc(DebugLoc::get(MF.getOrCreateDebugLocID( + CompileUnit.getGV(), Line, 0))); + if (DW && DW->ShouldEmitDwarfDebug()) { + // llvm.dbg.func_start also defines beginning of function scope. + DW->RecordRegionStart(cast<GlobalVariable>(FSI->getSubprogram())); + } + } + + return true; + } + case Intrinsic::dbg_declare: { + DbgDeclareInst *DI = cast<DbgDeclareInst>(I); + Value *Variable = DI->getVariable(); + if (DIDescriptor::ValidDebugInfo(Variable, CodeGenOpt::None) && + DW && DW->ShouldEmitDwarfDebug()) { + // Determine the address of the declared object. + Value *Address = DI->getAddress(); + if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + Address = BCI->getOperand(0); + AllocaInst *AI = dyn_cast<AllocaInst>(Address); + // Don't handle byval struct arguments or VLAs, for example. + if (!AI) break; + DenseMap<const AllocaInst*, int>::iterator SI = + StaticAllocaMap.find(AI); + if (SI == StaticAllocaMap.end()) break; // VLAs. + int FI = SI->second; + + // Determine the debug globalvariable. + GlobalValue *GV = cast<GlobalVariable>(Variable); + + // Build the DECLARE instruction. + const TargetInstrDesc &II = TII.get(TargetInstrInfo::DECLARE); + MachineInstr *DeclareMI + = BuildMI(MBB, DL, II).addFrameIndex(FI).addGlobalAddress(GV); + DIVariable DV(cast<GlobalVariable>(GV)); + if (!DV.isNull()) { + // This is a local variable + DW->RecordVariableScope(DV, DeclareMI); + } + } + return true; + } + case Intrinsic::eh_exception: { + MVT VT = TLI.getValueType(I->getType()); + switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { + default: break; + case TargetLowering::Expand: { + assert(MBB->isLandingPad() && "Call to eh.exception not in landing pad!"); + unsigned Reg = TLI.getExceptionAddressRegister(); + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned ResultReg = createResultReg(RC); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + Reg, RC, RC); + assert(InsertedCopy && "Can't copy address registers!"); + InsertedCopy = InsertedCopy; + UpdateValueMap(I, ResultReg); + return true; + } + } + break; + } + case Intrinsic::eh_selector_i32: + case Intrinsic::eh_selector_i64: { + MVT VT = TLI.getValueType(I->getType()); + switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { + default: break; + case TargetLowering::Expand: { + MVT VT = (IID == Intrinsic::eh_selector_i32 ? + MVT::i32 : MVT::i64); + + if (MMI) { + if (MBB->isLandingPad()) + AddCatchInfo(*cast<CallInst>(I), MMI, MBB); + else { +#ifndef NDEBUG + CatchInfoLost.insert(cast<CallInst>(I)); +#endif + // FIXME: Mark exception selector register as live in. Hack for PR1508. + unsigned Reg = TLI.getExceptionSelectorRegister(); + if (Reg) MBB->addLiveIn(Reg); + } + + unsigned Reg = TLI.getExceptionSelectorRegister(); + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned ResultReg = createResultReg(RC); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + Reg, RC, RC); + assert(InsertedCopy && "Can't copy address registers!"); + InsertedCopy = InsertedCopy; + UpdateValueMap(I, ResultReg); + } else { + unsigned ResultReg = + getRegForValue(Constant::getNullValue(I->getType())); + UpdateValueMap(I, ResultReg); + } + return true; + } + } + break; + } + } + return false; +} + +bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) { + MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + MVT DstVT = TLI.getValueType(I->getType()); + + if (SrcVT == MVT::Other || !SrcVT.isSimple() || + DstVT == MVT::Other || !DstVT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // Check if the destination type is legal. Or as a special case, + // it may be i1 if we're doing a truncate because that's + // easy and somewhat common. + if (!TLI.isTypeLegal(DstVT)) + if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // Check if the source operand is legal. Or as a special case, + // it may be i1 if we're doing zero-extension because that's + // easy and somewhat common. + if (!TLI.isTypeLegal(SrcVT)) + if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND) + // Unhandled type. Halt "fast" selection and bail. + return false; + + unsigned InputReg = getRegForValue(I->getOperand(0)); + if (!InputReg) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + // If the operand is i1, arrange for the high bits in the register to be zero. + if (SrcVT == MVT::i1) { + SrcVT = TLI.getTypeToTransformTo(SrcVT); + InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg); + if (!InputReg) + return false; + } + // If the result is i1, truncate to the target's type for i1 first. + if (DstVT == MVT::i1) + DstVT = TLI.getTypeToTransformTo(DstVT); + + unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), + DstVT.getSimpleVT(), + Opcode, + InputReg); + if (!ResultReg) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool FastISel::SelectBitCast(User *I) { + // If the bitcast doesn't change the type, just use the operand value. + if (I->getType() == I->getOperand(0)->getType()) { + unsigned Reg = getRegForValue(I->getOperand(0)); + if (Reg == 0) + return false; + UpdateValueMap(I, Reg); + return true; + } + + // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators. + MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + MVT DstVT = TLI.getValueType(I->getType()); + + if (SrcVT == MVT::Other || !SrcVT.isSimple() || + DstVT == MVT::Other || !DstVT.isSimple() || + !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT)) + // Unhandled type. Halt "fast" selection and bail. + return false; + + unsigned Op0 = getRegForValue(I->getOperand(0)); + if (Op0 == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + // First, try to perform the bitcast by inserting a reg-reg copy. + unsigned ResultReg = 0; + if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { + TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); + TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); + ResultReg = createResultReg(DstClass); + + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + Op0, DstClass, SrcClass); + if (!InsertedCopy) + ResultReg = 0; + } + + // If the reg-reg copy failed, select a BIT_CONVERT opcode. + if (!ResultReg) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), + ISD::BIT_CONVERT, Op0); + + if (!ResultReg) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool +FastISel::SelectInstruction(Instruction *I) { + return SelectOperator(I, I->getOpcode()); +} + +/// FastEmitBranch - Emit an unconditional branch to the given block, +/// unless it is the immediate (fall-through) successor, and update +/// the CFG. +void +FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { + MachineFunction::iterator NextMBB = + next(MachineFunction::iterator(MBB)); + + if (MBB->isLayoutSuccessor(MSucc)) { + // The unconditional fall-through case, which needs no instructions. + } else { + // The unconditional branch case. + TII.InsertBranch(*MBB, MSucc, NULL, SmallVector<MachineOperand, 0>()); + } + MBB->addSuccessor(MSucc); +} + +bool +FastISel::SelectOperator(User *I, unsigned Opcode) { + switch (Opcode) { + case Instruction::Add: { + ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FADD : ISD::ADD; + return SelectBinaryOp(I, Opc); + } + case Instruction::Sub: { + ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FSUB : ISD::SUB; + return SelectBinaryOp(I, Opc); + } + case Instruction::Mul: { + ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FMUL : ISD::MUL; + return SelectBinaryOp(I, Opc); + } + case Instruction::SDiv: + return SelectBinaryOp(I, ISD::SDIV); + case Instruction::UDiv: + return SelectBinaryOp(I, ISD::UDIV); + case Instruction::FDiv: + return SelectBinaryOp(I, ISD::FDIV); + case Instruction::SRem: + return SelectBinaryOp(I, ISD::SREM); + case Instruction::URem: + return SelectBinaryOp(I, ISD::UREM); + case Instruction::FRem: + return SelectBinaryOp(I, ISD::FREM); + case Instruction::Shl: + return SelectBinaryOp(I, ISD::SHL); + case Instruction::LShr: + return SelectBinaryOp(I, ISD::SRL); + case Instruction::AShr: + return SelectBinaryOp(I, ISD::SRA); + case Instruction::And: + return SelectBinaryOp(I, ISD::AND); + case Instruction::Or: + return SelectBinaryOp(I, ISD::OR); + case Instruction::Xor: + return SelectBinaryOp(I, ISD::XOR); + + case Instruction::GetElementPtr: + return SelectGetElementPtr(I); + + case Instruction::Br: { + BranchInst *BI = cast<BranchInst>(I); + + if (BI->isUnconditional()) { + BasicBlock *LLVMSucc = BI->getSuccessor(0); + MachineBasicBlock *MSucc = MBBMap[LLVMSucc]; + FastEmitBranch(MSucc); + return true; + } + + // Conditional branches are not handed yet. + // Halt "fast" selection and bail. + return false; + } + + case Instruction::Unreachable: + // Nothing to emit. + return true; + + case Instruction::PHI: + // PHI nodes are already emitted. + return true; + + case Instruction::Alloca: + // FunctionLowering has the static-sized case covered. + if (StaticAllocaMap.count(cast<AllocaInst>(I))) + return true; + + // Dynamic-sized alloca is not handled yet. + return false; + + case Instruction::Call: + return SelectCall(I); + + case Instruction::BitCast: + return SelectBitCast(I); + + case Instruction::FPToSI: + return SelectCast(I, ISD::FP_TO_SINT); + case Instruction::ZExt: + return SelectCast(I, ISD::ZERO_EXTEND); + case Instruction::SExt: + return SelectCast(I, ISD::SIGN_EXTEND); + case Instruction::Trunc: + return SelectCast(I, ISD::TRUNCATE); + case Instruction::SIToFP: + return SelectCast(I, ISD::SINT_TO_FP); + + case Instruction::IntToPtr: // Deliberate fall-through. + case Instruction::PtrToInt: { + MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + MVT DstVT = TLI.getValueType(I->getType()); + if (DstVT.bitsGT(SrcVT)) + return SelectCast(I, ISD::ZERO_EXTEND); + if (DstVT.bitsLT(SrcVT)) + return SelectCast(I, ISD::TRUNCATE); + unsigned Reg = getRegForValue(I->getOperand(0)); + if (Reg == 0) return false; + UpdateValueMap(I, Reg); + return true; + } + + default: + // Unhandled instruction. Halt "fast" selection and bail. + return false; + } +} + +FastISel::FastISel(MachineFunction &mf, + MachineModuleInfo *mmi, + DwarfWriter *dw, + DenseMap<const Value *, unsigned> &vm, + DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, + DenseMap<const AllocaInst *, int> &am +#ifndef NDEBUG + , SmallSet<Instruction*, 8> &cil +#endif + ) + : MBB(0), + ValueMap(vm), + MBBMap(bm), + StaticAllocaMap(am), +#ifndef NDEBUG + CatchInfoLost(cil), +#endif + MF(mf), + MMI(mmi), + DW(dw), + MRI(MF.getRegInfo()), + MFI(*MF.getFrameInfo()), + MCP(*MF.getConstantPool()), + TM(MF.getTarget()), + TD(*TM.getTargetData()), + TII(*TM.getInstrInfo()), + TLI(*TM.getTargetLowering()) { +} + +FastISel::~FastISel() {} + +unsigned FastISel::FastEmit_(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType) { + return 0; +} + +unsigned FastISel::FastEmit_r(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, unsigned /*Op0*/) { + return 0; +} + +unsigned FastISel::FastEmit_rr(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, unsigned /*Op0*/, + unsigned /*Op0*/) { + return 0; +} + +unsigned FastISel::FastEmit_i(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, uint64_t /*Imm*/) { + return 0; +} + +unsigned FastISel::FastEmit_f(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, ConstantFP * /*FPImm*/) { + return 0; +} + +unsigned FastISel::FastEmit_ri(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, unsigned /*Op0*/, + uint64_t /*Imm*/) { + return 0; +} + +unsigned FastISel::FastEmit_rf(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, unsigned /*Op0*/, + ConstantFP * /*FPImm*/) { + return 0; +} + +unsigned FastISel::FastEmit_rri(MVT::SimpleValueType, MVT::SimpleValueType, + ISD::NodeType, + unsigned /*Op0*/, unsigned /*Op1*/, + uint64_t /*Imm*/) { + return 0; +} + +/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries +/// to emit an instruction with an immediate operand using FastEmit_ri. +/// If that fails, it materializes the immediate into a register and try +/// FastEmit_rr instead. +unsigned FastISel::FastEmit_ri_(MVT::SimpleValueType VT, ISD::NodeType Opcode, + unsigned Op0, uint64_t Imm, + MVT::SimpleValueType ImmType) { + // First check if immediate type is legal. If not, we can't use the ri form. + unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm); + if (ResultReg != 0) + return ResultReg; + unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); + if (MaterialReg == 0) + return 0; + return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg); +} + +/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries +/// to emit an instruction with a floating-point immediate operand using +/// FastEmit_rf. If that fails, it materializes the immediate into a register +/// and try FastEmit_rr instead. +unsigned FastISel::FastEmit_rf_(MVT::SimpleValueType VT, ISD::NodeType Opcode, + unsigned Op0, ConstantFP *FPImm, + MVT::SimpleValueType ImmType) { + // First check if immediate type is legal. If not, we can't use the rf form. + unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm); + if (ResultReg != 0) + return ResultReg; + + // Materialize the constant in a register. + unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm); + if (MaterialReg == 0) { + // If the target doesn't have a way to directly enter a floating-point + // value into a register, use an alternate approach. + // TODO: The current approach only supports floating-point constants + // that can be constructed by conversion from integer values. This should + // be replaced by code that creates a load from a constant-pool entry, + // which will require some target-specific work. + const APFloat &Flt = FPImm->getValueAPF(); + MVT IntVT = TLI.getPointerTy(); + + uint64_t x[2]; + uint32_t IntBitWidth = IntVT.getSizeInBits(); + bool isExact; + (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, + APFloat::rmTowardZero, &isExact); + if (!isExact) + return 0; + APInt IntVal(IntBitWidth, 2, x); + + unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(), + ISD::Constant, IntVal.getZExtValue()); + if (IntegerReg == 0) + return 0; + MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT, + ISD::SINT_TO_FP, IntegerReg); + if (MaterialReg == 0) + return 0; + } + return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg); +} + +unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { + return MRI.createVirtualRegister(RC); +} + +unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass* RC) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + BuildMI(MBB, DL, II, ResultReg); + return ResultReg; +} + +unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0); + else { + BuildMI(MBB, DL, II).addReg(Op0); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, unsigned Op1) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1); + else { + BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Imm); + else { + BuildMI(MBB, DL, II).addReg(Op0).addImm(Imm); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, ConstantFP *FPImm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addFPImm(FPImm); + else { + BuildMI(MBB, DL, II).addReg(Op0).addFPImm(FPImm); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, unsigned Op1, uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1).addImm(Imm); + else { + BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1).addImm(Imm); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addImm(Imm); + else { + BuildMI(MBB, DL, II).addImm(Imm); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +unsigned FastISel::FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT, + unsigned Op0, uint32_t Idx) { + const TargetRegisterClass* RC = MRI.getRegClass(Op0); + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + const TargetInstrDesc &II = TII.get(TargetInstrInfo::EXTRACT_SUBREG); + + if (II.getNumDefs() >= 1) + BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx); + else { + BuildMI(MBB, DL, II).addReg(Op0).addImm(Idx); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, + II.ImplicitDefs[0], RC, RC); + if (!InsertedCopy) + ResultReg = 0; + } + return ResultReg; +} + +/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op +/// with all but the least significant bit set to zero. +unsigned FastISel::FastEmitZExtFromI1(MVT::SimpleValueType VT, unsigned Op) { + return FastEmit_ri(VT, VT, ISD::AND, Op, 1); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp new file mode 100644 index 000000000000..2cd67e61907f --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -0,0 +1,3091 @@ +//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::Legalize method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <map> +using namespace llvm; + +//===----------------------------------------------------------------------===// +/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and +/// hacks on it until the target machine can handle it. This involves +/// eliminating value sizes the machine cannot handle (promoting small sizes to +/// large sizes or splitting up large values into small values) as well as +/// eliminating operations the machine cannot handle. +/// +/// This code also does a small amount of optimization and recognition of idioms +/// as part of its processing. For example, if a target does not support a +/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this +/// will attempt merge setcc and brc instructions into brcc's. +/// +namespace { +class VISIBILITY_HIDDEN SelectionDAGLegalize { + TargetLowering &TLI; + SelectionDAG &DAG; + CodeGenOpt::Level OptLevel; + + // Libcall insertion helpers. + + /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been + /// legalized. We use this to ensure that calls are properly serialized + /// against each other, including inserted libcalls. + SDValue LastCALLSEQ_END; + + /// IsLegalizingCall - This member is used *only* for purposes of providing + /// helpful assertions that a libcall isn't created while another call is + /// being legalized (which could lead to non-serialized call sequences). + bool IsLegalizingCall; + + enum LegalizeAction { + Legal, // The target natively supports this operation. + Promote, // This operation should be executed in a larger type. + Expand // Try to expand this to other ops, otherwise use a libcall. + }; + + /// ValueTypeActions - This is a bitvector that contains two bits for each + /// value type, where the two bits correspond to the LegalizeAction enum. + /// This can be queried with "getTypeAction(VT)". + TargetLowering::ValueTypeActionImpl ValueTypeActions; + + /// LegalizedNodes - For nodes that are of legal width, and that have more + /// than one use, this map indicates what regularized operand to use. This + /// allows us to avoid legalizing the same thing more than once. + DenseMap<SDValue, SDValue> LegalizedNodes; + + void AddLegalizedOperand(SDValue From, SDValue To) { + LegalizedNodes.insert(std::make_pair(From, To)); + // If someone requests legalization of the new node, return itself. + if (From != To) + LegalizedNodes.insert(std::make_pair(To, To)); + } + +public: + SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol); + + /// getTypeAction - Return how we should legalize values of this type, either + /// it is already legal or we need to expand it into multiple registers of + /// smaller integer type, or we need to promote it to a larger type. + LegalizeAction getTypeAction(MVT VT) const { + return (LegalizeAction)ValueTypeActions.getTypeAction(VT); + } + + /// isTypeLegal - Return true if this type is legal on this target. + /// + bool isTypeLegal(MVT VT) const { + return getTypeAction(VT) == Legal; + } + + void LegalizeDAG(); + +private: + /// LegalizeOp - We know that the specified value has a legal type. + /// Recursively ensure that the operands have legal types, then return the + /// result. + SDValue LegalizeOp(SDValue O); + + /// PerformInsertVectorEltInMemory - Some target cannot handle a variable + /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it + /// is necessary to spill the vector being inserted into to memory, perform + /// the insert there, and then read the result back. + SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, + SDValue Idx, DebugLoc dl); + SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, + SDValue Idx, DebugLoc dl); + + /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which + /// performs the same shuffe in terms of order or result bytes, but on a type + /// whose vector element type is narrower than the original shuffle type. + /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> + SDValue ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl, + SDValue N1, SDValue N2, + SmallVectorImpl<int> &Mask) const; + + bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, + SmallPtrSet<SDNode*, 32> &NodesLeadingTo); + + void LegalizeSetCCCondCode(MVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + DebugLoc dl); + + SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_PPCF128); + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128); + + SDValue EmitStackConvert(SDValue SrcOp, MVT SlotVT, MVT DestVT, DebugLoc dl); + SDValue ExpandBUILD_VECTOR(SDNode *Node); + SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); + SDValue ExpandDBG_STOPPOINT(SDNode *Node); + void ExpandDYNAMIC_STACKALLOC(SDNode *Node, + SmallVectorImpl<SDValue> &Results); + SDValue ExpandFCOPYSIGN(SDNode *Node); + SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, MVT DestVT, + DebugLoc dl); + SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, MVT DestVT, bool isSigned, + DebugLoc dl); + SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, MVT DestVT, bool isSigned, + DebugLoc dl); + + SDValue ExpandBSWAP(SDValue Op, DebugLoc dl); + SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl); + + SDValue ExpandExtractFromVectorThroughStack(SDValue Op); + + void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); +}; +} + +/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which +/// performs the same shuffe in terms of order or result bytes, but on a type +/// whose vector element type is narrower than the original shuffle type. +/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> +SDValue +SelectionDAGLegalize::ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl, + SDValue N1, SDValue N2, + SmallVectorImpl<int> &Mask) const { + MVT EltVT = NVT.getVectorElementType(); + unsigned NumMaskElts = VT.getVectorNumElements(); + unsigned NumDestElts = NVT.getVectorNumElements(); + unsigned NumEltsGrowth = NumDestElts / NumMaskElts; + + assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); + + if (NumEltsGrowth == 1) + return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]); + + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumMaskElts; ++i) { + int Idx = Mask[i]; + for (unsigned j = 0; j != NumEltsGrowth; ++j) { + if (Idx < 0) + NewMask.push_back(-1); + else + NewMask.push_back(Idx * NumEltsGrowth + j); + } + } + assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?"); + assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?"); + return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); +} + +SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, + CodeGenOpt::Level ol) + : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol), + ValueTypeActions(TLI.getValueTypeActions()) { + assert(MVT::LAST_VALUETYPE <= 32 && + "Too many value types for ValueTypeActions to hold!"); +} + +void SelectionDAGLegalize::LegalizeDAG() { + LastCALLSEQ_END = DAG.getEntryNode(); + IsLegalizingCall = false; + + // The legalize process is inherently a bottom-up recursive process (users + // legalize their uses before themselves). Given infinite stack space, we + // could just start legalizing on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, compute an ordering of the nodes where each + // node is only legalized after all of its operands are legalized. + DAG.AssignTopologicalOrder(); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != next(E); ++I) + LegalizeOp(SDValue(I, 0)); + + // Finally, it's possible the root changed. Get the new root. + SDValue OldRoot = DAG.getRoot(); + assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); + DAG.setRoot(LegalizedNodes[OldRoot]); + + LegalizedNodes.clear(); + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); +} + + +/// FindCallEndFromCallStart - Given a chained node that is part of a call +/// sequence, find the CALLSEQ_END node that terminates the call sequence. +static SDNode *FindCallEndFromCallStart(SDNode *Node) { + if (Node->getOpcode() == ISD::CALLSEQ_END) + return Node; + if (Node->use_empty()) + return 0; // No CallSeqEnd + + // The chain is usually at the end. + SDValue TheChain(Node, Node->getNumValues()-1); + if (TheChain.getValueType() != MVT::Other) { + // Sometimes it's at the beginning. + TheChain = SDValue(Node, 0); + if (TheChain.getValueType() != MVT::Other) { + // Otherwise, hunt for it. + for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i) + if (Node->getValueType(i) == MVT::Other) { + TheChain = SDValue(Node, i); + break; + } + + // Otherwise, we walked into a node without a chain. + if (TheChain.getValueType() != MVT::Other) + return 0; + } + } + + for (SDNode::use_iterator UI = Node->use_begin(), + E = Node->use_end(); UI != E; ++UI) { + + // Make sure to only follow users of our token chain. + SDNode *User = *UI; + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) + if (User->getOperand(i) == TheChain) + if (SDNode *Result = FindCallEndFromCallStart(User)) + return Result; + } + return 0; +} + +/// FindCallStartFromCallEnd - Given a chained node that is part of a call +/// sequence, find the CALLSEQ_START node that initiates the call sequence. +static SDNode *FindCallStartFromCallEnd(SDNode *Node) { + assert(Node && "Didn't find callseq_start for a call??"); + if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; + + assert(Node->getOperand(0).getValueType() == MVT::Other && + "Node doesn't have a token chain argument!"); + return FindCallStartFromCallEnd(Node->getOperand(0).getNode()); +} + +/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to +/// see if any uses can reach Dest. If no dest operands can get to dest, +/// legalize them, legalize ourself, and return false, otherwise, return true. +/// +/// Keep track of the nodes we fine that actually do lead to Dest in +/// NodesLeadingTo. This avoids retraversing them exponential number of times. +/// +bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, + SmallPtrSet<SDNode*, 32> &NodesLeadingTo) { + if (N == Dest) return true; // N certainly leads to Dest :) + + // If we've already processed this node and it does lead to Dest, there is no + // need to reprocess it. + if (NodesLeadingTo.count(N)) return true; + + // If the first result of this node has been already legalized, then it cannot + // reach N. + if (LegalizedNodes.count(SDValue(N, 0))) return false; + + // Okay, this node has not already been legalized. Check and legalize all + // operands. If none lead to Dest, then we can legalize this node. + bool OperandsLeadToDest = false; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + OperandsLeadToDest |= // If an operand leads to Dest, so do we. + LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, NodesLeadingTo); + + if (OperandsLeadToDest) { + NodesLeadingTo.insert(N); + return true; + } + + // Okay, this node looks safe, legalize it and return false. + LegalizeOp(SDValue(N, 0)); + return false; +} + +/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or +/// a load from the constant pool. +static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, + SelectionDAG &DAG, const TargetLowering &TLI) { + bool Extend = false; + DebugLoc dl = CFP->getDebugLoc(); + + // If a FP immediate is precise when represented as a float and if the + // target can do an extending load from float to double, we put it into + // the constant pool as a float, even if it's is statically typed as a + // double. This shrinks FP constants and canonicalizes them for targets where + // an FP extending load is the same cost as a normal load (such as on the x87 + // fp stack or PPC FP unit). + MVT VT = CFP->getValueType(0); + ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue()); + if (!UseCP) { + assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion"); + return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), + (VT == MVT::f64) ? MVT::i64 : MVT::i32); + } + + MVT OrigVT = VT; + MVT SVT = VT; + while (SVT != MVT::f32) { + SVT = (MVT::SimpleValueType)(SVT.getSimpleVT() - 1); + if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) && + // Only do this if the target has a native EXTLOAD instruction from + // smaller type. + TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && + TLI.ShouldShrinkFPConstant(OrigVT)) { + const Type *SType = SVT.getTypeForMVT(); + LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType)); + VT = SVT; + Extend = true; + } + } + + SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + if (Extend) + return DAG.getExtLoad(ISD::EXTLOAD, dl, + OrigVT, DAG.getEntryNode(), + CPIdx, PseudoSourceValue::getConstantPool(), + 0, VT, false, Alignment); + return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, false, Alignment); +} + +/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. +static +SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, + const TargetLowering &TLI) { + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + SDValue Val = ST->getValue(); + MVT VT = Val.getValueType(); + int Alignment = ST->getAlignment(); + int SVOffset = ST->getSrcValueOffset(); + DebugLoc dl = ST->getDebugLoc(); + if (ST->getMemoryVT().isFloatingPoint() || + ST->getMemoryVT().isVector()) { + MVT intVT = MVT::getIntegerVT(VT.getSizeInBits()); + if (TLI.isTypeLegal(intVT)) { + // Expand to a bitconvert of the value to the integer type of the + // same size, then a (misaligned) int store. + // FIXME: Does not handle truncating floating point stores! + SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val); + return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(), + SVOffset, ST->isVolatile(), Alignment); + } else { + // Do a (aligned) store to a stack slot, then copy from the stack slot + // to the final destination using (unaligned) integer loads and stores. + MVT StoredVT = ST->getMemoryVT(); + MVT RegVT = + TLI.getRegisterType(MVT::getIntegerVT(StoredVT.getSizeInBits())); + unsigned StoredBytes = StoredVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); + + // Perform the original store, only redirected to the stack slot. + SDValue Store = DAG.getTruncStore(Chain, dl, + Val, StackPtr, NULL, 0, StoredVT); + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SmallVector<SDValue, 8> Stores; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the stack slot. + SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0); + // Store it to the final location. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, + ST->getSrcValue(), SVOffset + Offset, + ST->isVolatile(), + MinAlign(ST->getAlignment(), Offset))); + // Increment the pointers. + Offset += RegBytes; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + } + + // The last store may be partial. Do a truncating store. On big-endian + // machines this requires an extending load from the stack slot to ensure + // that the bits are in the right place. + MVT MemVT = MVT::getIntegerVT(8 * (StoredBytes - Offset)); + + // Load from the stack slot. + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + NULL, 0, MemVT); + + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, + ST->getSrcValue(), SVOffset + Offset, + MemVT, ST->isVolatile(), + MinAlign(ST->getAlignment(), Offset))); + // The order of the stores doesn't matter - say it with a TokenFactor. + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + } + } + assert(ST->getMemoryVT().isInteger() && + !ST->getMemoryVT().isVector() && + "Unaligned store of unknown type."); + // Get the half-size VT + MVT NewStoredVT = + (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT() - 1); + int NumBits = NewStoredVT.getSizeInBits(); + int IncrementSize = NumBits / 8; + + // Divide the stored value in two parts. + SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy()); + SDValue Lo = Val; + SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); + + // Store the two parts + SDValue Store1, Store2; + Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, + ST->getSrcValue(), SVOffset, NewStoredVT, + ST->isVolatile(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Alignment = MinAlign(Alignment, IncrementSize); + Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, + ST->getSrcValue(), SVOffset + IncrementSize, + NewStoredVT, ST->isVolatile(), Alignment); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); +} + +/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. +static +SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, + const TargetLowering &TLI) { + int SVOffset = LD->getSrcValueOffset(); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + MVT VT = LD->getValueType(0); + MVT LoadedVT = LD->getMemoryVT(); + DebugLoc dl = LD->getDebugLoc(); + if (VT.isFloatingPoint() || VT.isVector()) { + MVT intVT = MVT::getIntegerVT(LoadedVT.getSizeInBits()); + if (TLI.isTypeLegal(intVT)) { + // Expand to a (misaligned) integer load of the same size, + // then bitconvert to floating point or vector. + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset, LD->isVolatile(), + LD->getAlignment()); + SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad); + if (VT.isFloatingPoint() && LoadedVT != VT) + Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); + + SDValue Ops[] = { Result, Chain }; + return DAG.getMergeValues(Ops, 2, dl); + } else { + // Copy the value to a (aligned) stack slot using (unaligned) integer + // loads and stores, then do a (aligned) load from the stack slot. + MVT RegVT = TLI.getRegisterType(intVT); + unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); + + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SmallVector<SDValue, 8> Stores; + SDValue StackPtr = StackBase; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the original location. + SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset + Offset, LD->isVolatile(), + MinAlign(LD->getAlignment(), Offset)); + // Follow the load with a store to the stack slot. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, + NULL, 0)); + // Increment the pointers. + Offset += RegBytes; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); + } + + // The last copy may be partial. Do an extending load. + MVT MemVT = MVT::getIntegerVT(8 * (LoadedBytes - Offset)); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, + LD->getSrcValue(), SVOffset + Offset, + MemVT, LD->isVolatile(), + MinAlign(LD->getAlignment(), Offset)); + // Follow the load with a store to the stack slot. Remember the store. + // On big-endian machines this requires a truncating store to ensure + // that the bits end up in the right place. + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, + NULL, 0, MemVT)); + + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + + // Finally, perform the original load only redirected to the stack slot. + Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, + NULL, 0, LoadedVT); + + // Callers expect a MERGE_VALUES node. + SDValue Ops[] = { Load, TF }; + return DAG.getMergeValues(Ops, 2, dl); + } + } + assert(LoadedVT.isInteger() && !LoadedVT.isVector() && + "Unaligned load of unsupported type."); + + // Compute the new VT that is half the size of the old one. This is an + // integer MVT. + unsigned NumBits = LoadedVT.getSizeInBits(); + MVT NewLoadedVT; + NewLoadedVT = MVT::getIntegerVT(NumBits/2); + NumBits >>= 1; + + unsigned Alignment = LD->getAlignment(); + unsigned IncrementSize = NumBits / 8; + ISD::LoadExtType HiExtType = LD->getExtensionType(); + + // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD. + if (HiExtType == ISD::NON_EXTLOAD) + HiExtType = ISD::ZEXTLOAD; + + // Load the value in two parts + SDValue Lo, Hi; + if (TLI.isLittleEndian()) { + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), + SVOffset, NewLoadedVT, LD->isVolatile(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), + SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), + MinAlign(Alignment, IncrementSize)); + } else { + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), + SVOffset, NewLoadedVT, LD->isVolatile(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, TLI.getPointerTy())); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), + SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), + MinAlign(Alignment, IncrementSize)); + } + + // aggregate the two parts + SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy()); + SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); + Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + SDValue Ops[] = { Result, TF }; + return DAG.getMergeValues(Ops, 2, dl); +} + +/// PerformInsertVectorEltInMemory - Some target cannot handle a variable +/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it +/// is necessary to spill the vector being inserted into to memory, perform +/// the insert there, and then read the result back. +SDValue SelectionDAGLegalize:: +PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, + DebugLoc dl) { + SDValue Tmp1 = Vec; + SDValue Tmp2 = Val; + SDValue Tmp3 = Idx; + + // If the target doesn't support this, we have to spill the input vector + // to a temporary stack slot, update the element, then reload it. This is + // badness. We could also load the value into a vector register (either + // with a "move to register" or "extload into register" instruction, then + // permute it into place, if the idx is a constant and if the idx is + // supported by the target. + MVT VT = Tmp1.getValueType(); + MVT EltVT = VT.getVectorElementType(); + MVT IdxVT = Tmp3.getValueType(); + MVT PtrVT = TLI.getPointerTy(); + SDValue StackPtr = DAG.CreateStackTemporary(VT); + + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + + // Store the vector. + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0); + + // Truncate or zero extend offset to target pointer type. + unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; + Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3); + // Add the offset to the index. + unsigned EltSize = EltVT.getSizeInBits()/8; + Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT)); + SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); + // Store the scalar value. + Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, + PseudoSourceValue::getFixedStack(SPFI), 0, EltVT); + // Load the updated vector. + return DAG.getLoad(VT, dl, Ch, StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0); +} + + +SDValue SelectionDAGLegalize:: +ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) { + if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) { + // SCALAR_TO_VECTOR requires that the type of the value being inserted + // match the element type of the vector being created, except for + // integers in which case the inserted value can be over width. + MVT EltVT = Vec.getValueType().getVectorElementType(); + if (Val.getValueType() == EltVT || + (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) { + SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + Vec.getValueType(), Val); + + unsigned NumElts = Vec.getValueType().getVectorNumElements(); + // We generate a shuffle of InVec and ScVec, so the shuffle mask + // should be 0,1,2,3,4,5... with the appropriate element replaced with + // elt 0 of the RHS. + SmallVector<int, 8> ShufOps; + for (unsigned i = 0; i != NumElts; ++i) + ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts); + + return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, + &ShufOps[0]); + } + } + return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); +} + +/// LegalizeOp - We know that the specified value has a legal type, and +/// that its operands are legal. Now ensure that the operation itself +/// is legal, recursively ensuring that the operands' operations remain +/// legal. +SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { + if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + return Op; + + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + assert(getTypeAction(Node->getValueType(i)) == Legal && + "Unexpected illegal type!"); + + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + assert((isTypeLegal(Node->getOperand(i).getValueType()) || + Node->getOperand(i).getOpcode() == ISD::TargetConstant) && + "Unexpected illegal type!"); + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + if (I != LegalizedNodes.end()) return I->second; + + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + SDValue Result = Op; + bool isCustom = false; + + // Figure out the correct action; the way to query this varies by opcode + TargetLowering::LegalizeAction Action; + bool SimpleFinishLegalizing = true; + switch (Node->getOpcode()) { + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::VAARG: + case ISD::STACKSAVE: + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::EXTRACT_VECTOR_ELT: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + break; + case ISD::FP_ROUND_INREG: + case ISD::SIGN_EXTEND_INREG: { + MVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT(); + Action = TLI.getOperationAction(Node->getOpcode(), InnerType); + break; + } + case ISD::SELECT_CC: + case ISD::SETCC: + case ISD::BR_CC: { + unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : + Node->getOpcode() == ISD::SETCC ? 2 : 1; + unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; + MVT OpVT = Node->getOperand(CompareOperand).getValueType(); + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get(); + Action = TLI.getCondCodeAction(CCCode, OpVT); + if (Action == TargetLowering::Legal) { + if (Node->getOpcode() == ISD::SELECT_CC) + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + else + Action = TLI.getOperationAction(Node->getOpcode(), OpVT); + } + break; + } + case ISD::LOAD: + case ISD::STORE: + // FIXME: Model these properly. LOAD and STORE are complicated, and + // STORE expects the unlegalized operand in some cases. + SimpleFinishLegalizing = false; + break; + case ISD::CALLSEQ_START: + case ISD::CALLSEQ_END: + // FIXME: This shouldn't be necessary. These nodes have special properties + // dealing with the recursive nature of legalization. Removing this + // special case should be done as part of making LegalizeDAG non-recursive. + SimpleFinishLegalizing = false; + break; + case ISD::CALL: + // FIXME: Legalization for calls requires custom-lowering the call before + // legalizing the operands! (I haven't looked into precisely why.) + SimpleFinishLegalizing = false; + break; + case ISD::EXTRACT_ELEMENT: + case ISD::FLT_ROUNDS_: + case ISD::SADDO: + case ISD::SSUBO: + case ISD::UADDO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + case ISD::FPOWI: + case ISD::MERGE_VALUES: + case ISD::EH_RETURN: + case ISD::FRAME_TO_ARGS_OFFSET: + // These operations lie about being legal: when they claim to be legal, + // they should actually be expanded. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Legal) + Action = TargetLowering::Expand; + break; + case ISD::TRAMPOLINE: + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + case ISD::FORMAL_ARGUMENTS: + // These operations lie about being legal: when they claim to be legal, + // they should actually be custom-lowered. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Legal) + Action = TargetLowering::Custom; + break; + case ISD::BUILD_VECTOR: + // A weird case: legalization for BUILD_VECTOR never legalizes the + // operands! + // FIXME: This really sucks... changing it isn't semantically incorrect, + // but it massively pessimizes the code for floating-point BUILD_VECTORs + // because ConstantFP operands get legalized into constant pool loads + // before the BUILD_VECTOR code can see them. It doesn't usually bite, + // though, because BUILD_VECTORS usually get lowered into other nodes + // which get legalized properly. + SimpleFinishLegalizing = false; + break; + default: + if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { + Action = TargetLowering::Legal; + } else { + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + } + break; + } + + if (SimpleFinishLegalizing) { + SmallVector<SDValue, 8> Ops, ResultVals; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(LegalizeOp(Node->getOperand(i))); + switch (Node->getOpcode()) { + default: break; + case ISD::BR: + case ISD::BRIND: + case ISD::BR_JT: + case ISD::BR_CC: + case ISD::BRCOND: + case ISD::RET: + // Branches tweak the chain to include LastCALLSEQ_END + Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], + LastCALLSEQ_END); + Ops[0] = LegalizeOp(Ops[0]); + LastCALLSEQ_END = DAG.getEntryNode(); + break; + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + case ISD::ROTL: + case ISD::ROTR: + // Legalizing shifts/rotates requires adjusting the shift amount + // to the appropriate width. + if (!Ops[1].getValueType().isVector()) + Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1])); + break; + } + + Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(), + Ops.size()); + switch (Action) { + case TargetLowering::Legal: + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + ResultVals.push_back(Result.getValue(i)); + break; + case TargetLowering::Custom: + // FIXME: The handling for custom lowering with multiple results is + // a complete mess. + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.getNode()) { + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { + if (e == 1) + ResultVals.push_back(Tmp1); + else + ResultVals.push_back(Tmp1.getValue(i)); + } + break; + } + + // FALL THROUGH + case TargetLowering::Expand: + ExpandNode(Result.getNode(), ResultVals); + break; + case TargetLowering::Promote: + PromoteNode(Result.getNode(), ResultVals); + break; + } + if (!ResultVals.empty()) { + for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) { + if (ResultVals[i] != SDValue(Node, i)) + ResultVals[i] = LegalizeOp(ResultVals[i]); + AddLegalizedOperand(SDValue(Node, i), ResultVals[i]); + } + return ResultVals[Op.getResNo()]; + } + } + + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "NODE: "; Node->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to legalize this operator!"); + abort(); + case ISD::CALL: + // The only option for this is to custom lower it. + Tmp3 = TLI.LowerOperation(Result.getValue(0), DAG); + assert(Tmp3.getNode() && "Target didn't custom lower this node!"); + // A call within a calling sequence must be legalized to something + // other than the normal CALLSEQ_END. Violating this gets Legalize + // into an infinite loop. + assert ((!IsLegalizingCall || + Node->getOpcode() != ISD::CALL || + Tmp3.getNode()->getOpcode() != ISD::CALLSEQ_END) && + "Nested CALLSEQ_START..CALLSEQ_END not supported."); + + // The number of incoming and outgoing values should match; unless the final + // outgoing value is a flag. + assert((Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() || + (Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() + 1 && + Tmp3.getNode()->getValueType(Tmp3.getNode()->getNumValues() - 1) == + MVT::Flag)) && + "Lowering call/formal_arguments produced unexpected # results!"); + + // Since CALL/FORMAL_ARGUMENTS nodes produce multiple values, make sure to + // remember that we legalized all of them, so it doesn't get relegalized. + for (unsigned i = 0, e = Tmp3.getNode()->getNumValues(); i != e; ++i) { + if (Tmp3.getNode()->getValueType(i) == MVT::Flag) + continue; + Tmp1 = LegalizeOp(Tmp3.getValue(i)); + if (Op.getResNo() == i) + Tmp2 = Tmp1; + AddLegalizedOperand(SDValue(Node, i), Tmp1); + } + return Tmp2; + case ISD::BUILD_VECTOR: + switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.getNode()) { + Result = Tmp3; + break; + } + // FALLTHROUGH + case TargetLowering::Expand: + Result = ExpandBUILD_VECTOR(Result.getNode()); + break; + } + break; + case ISD::CALLSEQ_START: { + SDNode *CallEnd = FindCallEndFromCallStart(Node); + + // Recursively Legalize all of the inputs of the call end that do not lead + // to this call start. This ensures that any libcalls that need be inserted + // are inserted *before* the CALLSEQ_START. + {SmallPtrSet<SDNode*, 32> NodesLeadingTo; + for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i) + LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node, + NodesLeadingTo); + } + + // Now that we legalized all of the inputs (which may have inserted + // libcalls) create the new CALLSEQ_START node. + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + + // Merge in the last call, to ensure that this call start after the last + // call ended. + if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { + Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Tmp1, LastCALLSEQ_END); + Tmp1 = LegalizeOp(Tmp1); + } + + // Do not try to legalize the target-specific arguments (#1+). + if (Tmp1 != Node->getOperand(0)) { + SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + } + + // Remember that the CALLSEQ_START is legalized. + AddLegalizedOperand(Op.getValue(0), Result); + if (Node->getNumValues() == 2) // If this has a flag result, remember it. + AddLegalizedOperand(Op.getValue(1), Result.getValue(1)); + + // Now that the callseq_start and all of the non-call nodes above this call + // sequence have been legalized, legalize the call itself. During this + // process, no libcalls can/will be inserted, guaranteeing that no calls + // can overlap. + assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); + // Note that we are selecting this call! + LastCALLSEQ_END = SDValue(CallEnd, 0); + IsLegalizingCall = true; + + // Legalize the call, starting from the CALLSEQ_END. + LegalizeOp(LastCALLSEQ_END); + assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); + return Result; + } + case ISD::CALLSEQ_END: + // If the CALLSEQ_START node hasn't been legalized first, legalize it. This + // will cause this node to be legalized as well as handling libcalls right. + if (LastCALLSEQ_END.getNode() != Node) { + LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + assert(I != LegalizedNodes.end() && + "Legalizing the call start should have legalized this node!"); + return I->second; + } + + // Otherwise, the call start has been legalized and everything is going + // according to plan. Just legalize ourselves normally here. + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + // Do not try to legalize the target-specific arguments (#1+), except for + // an optional flag input. + if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){ + if (Tmp1 != Node->getOperand(0)) { + SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + } + } else { + Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); + if (Tmp1 != Node->getOperand(0) || + Tmp2 != Node->getOperand(Node->getNumOperands()-1)) { + SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Ops.back() = Tmp2; + Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + } + } + assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); + // This finishes up call legalization. + IsLegalizingCall = false; + + // If the CALLSEQ_END node has a flag, remember that we legalized it. + AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); + if (Node->getNumValues() == 2) + AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1)); + return Result.getValue(Op.getResNo()); + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Node); + Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain. + Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer. + + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) { + MVT VT = Node->getValueType(0); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Tmp3 = Result.getValue(0); + Tmp4 = Result.getValue(1); + + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses()) { + unsigned ABIAlignment = TLI.getTargetData()-> + getABITypeAlignment(LD->getMemoryVT().getTypeForMVT()); + if (LD->getAlignment() < ABIAlignment){ + Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG, + TLI); + Tmp3 = Result.getOperand(0); + Tmp4 = Result.getOperand(1); + Tmp3 = LegalizeOp(Tmp3); + Tmp4 = LegalizeOp(Tmp4); + } + } + break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Tmp3, DAG); + if (Tmp1.getNode()) { + Tmp3 = LegalizeOp(Tmp1); + Tmp4 = LegalizeOp(Tmp1.getValue(1)); + } + break; + case TargetLowering::Promote: { + // Only promote a load of vector type to another. + assert(VT.isVector() && "Cannot promote this load!"); + // Change base type to a different vector type. + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + + Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); + Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1)); + Tmp4 = LegalizeOp(Tmp1.getValue(1)); + break; + } + } + // Since loads produce two values, make sure to remember that we + // legalized both of them. + AddLegalizedOperand(SDValue(Node, 0), Tmp3); + AddLegalizedOperand(SDValue(Node, 1), Tmp4); + return Op.getResNo() ? Tmp4 : Tmp3; + } else { + MVT SrcVT = LD->getMemoryVT(); + unsigned SrcWidth = SrcVT.getSizeInBits(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + + if (SrcWidth != SrcVT.getStoreSizeInBits() && + // Some targets pretend to have an i1 loading operation, and actually + // load an i8. This trick is correct for ZEXTLOAD because the top 7 + // bits are guaranteed to be zero; it helps the optimizers understand + // that these bits are zero. It is also useful for EXTLOAD, since it + // tells the optimizers that those bits are undefined. It would be + // nice to have an effective generic way of getting these benefits... + // Until such a way is found, don't insist on promoting i1 here. + (SrcVT != MVT::i1 || + TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { + // Promote to a byte-sized load if not loading an integral number of + // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. + unsigned NewWidth = SrcVT.getStoreSizeInBits(); + MVT NVT = MVT::getIntegerVT(NewWidth); + SDValue Ch; + + // The extra bits are guaranteed to be zero, since we stored them that + // way. A zext load from NVT thus automatically gives zext from SrcVT. + + ISD::LoadExtType NewExtType = + ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; + + Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getSrcValue(), SVOffset, + NVT, isVolatile, Alignment); + + Ch = Result.getValue(1); // The chain. + + if (ExtType == ISD::SEXTLOAD) + // Having the top bits zero doesn't help when sign extending. + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) + // All the top bits are guaranteed to be zero - inform the optimizers. + Result = DAG.getNode(ISD::AssertZext, dl, + Result.getValueType(), Result, + DAG.getValueType(SrcVT)); + + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); + } else if (SrcWidth & (SrcWidth - 1)) { + // If not loading a power-of-2 number of bits, expand as two loads. + assert(SrcVT.isExtended() && !SrcVT.isVector() && + "Unsupported extload!"); + unsigned RoundWidth = 1 << Log2_32(SrcWidth); + assert(RoundWidth < SrcWidth); + unsigned ExtraWidth = SrcWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Load size not an integral number of bytes!"); + MVT RoundVT = MVT::getIntegerVT(RoundWidth); + MVT ExtraVT = MVT::getIntegerVT(ExtraWidth); + SDValue Lo, Hi, Ch; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) + // Load the bottom RoundWidth bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, + Node->getValueType(0), Tmp1, Tmp2, + LD->getSrcValue(), SVOffset, RoundVT, isVolatile, + Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + LD->getSrcValue(), SVOffset + IncrementSize, + ExtraVT, isVolatile, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + + // Join the hi and lo parts. + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } else { + // Big endian - avoid unaligned loads. + // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 + // Load the top RoundWidth bits. + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + LD->getSrcValue(), SVOffset, RoundVT, isVolatile, + Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, + Node->getValueType(0), Tmp1, Tmp2, + LD->getSrcValue(), SVOffset + IncrementSize, + ExtraVT, isVolatile, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + + // Join the hi and lo parts. + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } + + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); + } else { + switch (TLI.getLoadExtAction(ExtType, SrcVT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Tmp1 = Result.getValue(0); + Tmp2 = Result.getValue(1); + + if (isCustom) { + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.getNode()) { + Tmp1 = LegalizeOp(Tmp3); + Tmp2 = LegalizeOp(Tmp3.getValue(1)); + } + } else { + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses()) { + unsigned ABIAlignment = TLI.getTargetData()-> + getABITypeAlignment(LD->getMemoryVT().getTypeForMVT()); + if (LD->getAlignment() < ABIAlignment){ + Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG, + TLI); + Tmp1 = Result.getOperand(0); + Tmp2 = Result.getOperand(1); + Tmp1 = LegalizeOp(Tmp1); + Tmp2 = LegalizeOp(Tmp2); + } + } + } + break; + case TargetLowering::Expand: + // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND + if (SrcVT == MVT::f32 && Node->getValueType(0) == MVT::f64) { + SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); + Result = DAG.getNode(ISD::FP_EXTEND, dl, + Node->getValueType(0), Load); + Tmp1 = LegalizeOp(Result); // Relegalize new nodes. + Tmp2 = LegalizeOp(Load.getValue(1)); + break; + } + assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an explicit + // zero/sign extend inreg. + Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getSrcValue(), + LD->getSrcValueOffset(), SrcVT, + LD->isVolatile(), LD->getAlignment()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT); + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + break; + } + } + + // Since loads produce two values, make sure to remember that we legalized + // both of them. + AddLegalizedOperand(SDValue(Node, 0), Tmp1); + AddLegalizedOperand(SDValue(Node, 1), Tmp2); + return Op.getResNo() ? Tmp2 : Tmp1; + } + } + case ISD::STORE: { + StoreSDNode *ST = cast<StoreSDNode>(Node); + Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. + Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. + int SVOffset = ST->getSrcValueOffset(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + + if (!ST->isTruncatingStore()) { + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // FIXME: We shouldn't do this for TargetConstantFP's. + // FIXME: move this to the DAG Combiner! Note that we can't regress due + // to phase ordering between legalized code and the dag combiner. This + // probably means that we need to integrate dag combiner and legalizer + // together. + // We generally can't do this one for long doubles. + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { + if (CFP->getValueType(0) == MVT::f32 && + getTypeAction(MVT::i32) == Legal) { + Tmp3 = DAG.getConstant(CFP->getValueAPF(). + bitcastToAPInt().zextOrTrunc(32), + MVT::i32); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + break; + } else if (CFP->getValueType(0) == MVT::f64) { + // If this target supports 64-bit registers, do a single 64-bit store. + if (getTypeAction(MVT::i64) == Legal) { + Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + zextOrTrunc(64), MVT::i64); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + break; + } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { + // Otherwise, if the target supports 32-bit registers, use 2 32-bit + // stores. If the target supports neither 32- nor 64-bits, this + // xform is certainly not worth it. + const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt(); + SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32); + SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); + if (TLI.isBigEndian()) std::swap(Lo, Hi); + + Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(4)); + Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4, + isVolatile, MinAlign(Alignment, 4U)); + + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + break; + } + } + } + + { + Tmp3 = LegalizeOp(ST->getValue()); + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, + ST->getOffset()); + + MVT VT = Tmp3.getValueType(); + switch (TLI.getOperationAction(ISD::STORE, VT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses()) { + unsigned ABIAlignment = TLI.getTargetData()-> + getABITypeAlignment(ST->getMemoryVT().getTypeForMVT()); + if (ST->getAlignment() < ABIAlignment) + Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG, + TLI); + } + break; + case TargetLowering::Custom: + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.getNode()) Result = Tmp1; + break; + case TargetLowering::Promote: + assert(VT.isVector() && "Unknown legal promote case!"); + Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl, + TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getSrcValue(), SVOffset, isVolatile, + Alignment); + break; + } + break; + } + } else { + Tmp3 = LegalizeOp(ST->getValue()); + + MVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + + if (StWidth != StVT.getStoreSizeInBits()) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + MVT NVT = MVT::getIntegerVT(StVT.getStoreSizeInBits()); + Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); + Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, NVT, isVolatile, Alignment); + } else if (StWidth & (StWidth - 1)) { + // If not storing a power-of-2 number of bits, expand as two stores. + assert(StVT.isExtended() && !StVT.isVector() && + "Unsupported truncstore!"); + unsigned RoundWidth = 1 << Log2_32(StWidth); + assert(RoundWidth < StWidth); + unsigned ExtraWidth = StWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Store size not an integral number of bytes!"); + MVT RoundVT = MVT::getIntegerVT(RoundWidth); + MVT ExtraVT = MVT::getIntegerVT(ExtraWidth); + SDValue Lo, Hi; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) + // Store the bottom RoundWidth bits. + Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, RoundVT, + isVolatile, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, + DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), + SVOffset + IncrementSize, ExtraVT, isVolatile, + MinAlign(Alignment, IncrementSize)); + } else { + // Big endian - avoid unaligned stores. + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X + // Store the top RoundWidth bits. + Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, + DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), + SVOffset, RoundVT, isVolatile, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset + IncrementSize, ExtraVT, isVolatile, + MinAlign(Alignment, IncrementSize)); + } + + // The order of the stores doesn't matter. + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } else { + if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || + Tmp2 != ST->getBasePtr()) + Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, + ST->getOffset()); + + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses()) { + unsigned ABIAlignment = TLI.getTargetData()-> + getABITypeAlignment(ST->getMemoryVT().getTypeForMVT()); + if (ST->getAlignment() < ABIAlignment) + Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG, + TLI); + } + break; + case TargetLowering::Custom: + Result = TLI.LowerOperation(Result, DAG); + break; + case Expand: + // TRUNCSTORE:i16 i32 -> STORE i16 + assert(isTypeLegal(StVT) && "Do not know how to expand this store!"); + Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + break; + } + } + } + break; + } + } + assert(Result.getValueType() == Op.getValueType() && + "Bad legalization!"); + + // Make sure that the generated code is itself legal. + if (Result != Op) + Result = LegalizeOp(Result); + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + AddLegalizedOperand(Op, Result); + return Result; +} + +SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { + SDValue Vec = Op.getOperand(0); + SDValue Idx = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + // Store the value to a temporary stack slot, then LOAD the returned part. + SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0); + + // Add the offset to the index. + unsigned EltSize = + Vec.getValueType().getVectorElementType().getSizeInBits()/8; + Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, + DAG.getConstant(EltSize, Idx.getValueType())); + + if (Idx.getValueType().bitsGT(TLI.getPointerTy())) + Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); + else + Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + + StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); + + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0); +} + +SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1 = Node->getOperand(0); + SDValue Tmp2 = Node->getOperand(1); + assert((Tmp2.getValueType() == MVT::f32 || + Tmp2.getValueType() == MVT::f64) && + "Ugly special-cased code!"); + // Get the sign bit of the RHS. + SDValue SignBit; + MVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32; + if (isTypeLegal(IVT)) { + SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2); + } else { + assert(isTypeLegal(TLI.getPointerTy()) && + (TLI.getPointerTy() == MVT::i32 || + TLI.getPointerTy() == MVT::i64) && + "Legal type for load?!"); + SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType()); + SDValue StorePtr = StackPtr, LoadPtr = StackPtr; + SDValue Ch = + DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0); + if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian()) + LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), + LoadPtr, DAG.getIntPtrConstant(4)); + SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(), + Ch, LoadPtr, NULL, 0, MVT::i32); + } + SignBit = + DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()), + SignBit, DAG.getConstant(0, SignBit.getValueType()), + ISD::SETLT); + // Get the absolute value of the result. + SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); + // Select between the nabs and abs value based on the sign bit of + // the input. + return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit, + DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), + AbsVal); +} + +SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) { + DebugLoc dl = Node->getDebugLoc(); + DwarfWriter *DW = DAG.getDwarfWriter(); + bool useDEBUG_LOC = TLI.isOperationLegalOrCustom(ISD::DEBUG_LOC, + MVT::Other); + bool useLABEL = TLI.isOperationLegalOrCustom(ISD::DBG_LABEL, MVT::Other); + + const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(Node); + GlobalVariable *CU_GV = cast<GlobalVariable>(DSP->getCompileUnit()); + if (DW && (useDEBUG_LOC || useLABEL) && !CU_GV->isDeclaration()) { + DICompileUnit CU(cast<GlobalVariable>(DSP->getCompileUnit())); + + unsigned Line = DSP->getLine(); + unsigned Col = DSP->getColumn(); + + if (OptLevel == CodeGenOpt::None) { + // A bit self-referential to have DebugLoc on Debug_Loc nodes, but it + // won't hurt anything. + if (useDEBUG_LOC) { + return DAG.getNode(ISD::DEBUG_LOC, dl, MVT::Other, Node->getOperand(0), + DAG.getConstant(Line, MVT::i32), + DAG.getConstant(Col, MVT::i32), + DAG.getSrcValue(CU.getGV())); + } else { + unsigned ID = DW->RecordSourceLine(Line, Col, CU); + return DAG.getLabel(ISD::DBG_LABEL, dl, Node->getOperand(0), ID); + } + } + } + return Node->getOperand(0); +} + +void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, + SmallVectorImpl<SDValue> &Results) { + unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" + " not tell us which reg is the stack pointer!"); + DebugLoc dl = Node->getDebugLoc(); + MVT VT = Node->getValueType(0); + SDValue Tmp1 = SDValue(Node, 0); + SDValue Tmp2 = SDValue(Node, 1); + SDValue Tmp3 = Node->getOperand(2); + SDValue Chain = Tmp1.getOperand(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); + + SDValue Size = Tmp2.getOperand(1); + SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); + Chain = SP.getValue(1); + unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); + unsigned StackAlign = + TLI.getTargetMachine().getFrameInfo()->getStackAlignment(); + if (Align > StackAlign) + SP = DAG.getNode(ISD::AND, dl, VT, SP, + DAG.getConstant(-(uint64_t)Align, VT)); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain + + Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), + DAG.getIntPtrConstant(0, true), SDValue()); + + Results.push_back(Tmp1); + Results.push_back(Tmp2); +} + +/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and +/// condition code CC on the current target. This routine assumes LHS and rHS +/// have already been legalized by LegalizeSetCCOperands. It expands SETCC with +/// illegal condition code into AND / OR of multiple SETCC values. +void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT, + SDValue &LHS, SDValue &RHS, + SDValue &CC, + DebugLoc dl) { + MVT OpVT = LHS.getValueType(); + ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); + switch (TLI.getCondCodeAction(CCCode, OpVT)) { + default: assert(0 && "Unknown condition code action!"); + case TargetLowering::Legal: + // Nothing to do. + break; + case TargetLowering::Expand: { + ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; + unsigned Opc = 0; + switch (CCCode) { + default: assert(0 && "Don't know how to expand this condition!"); abort(); + case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break; + case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break; + case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break; + // FIXME: Implement more expansions. + } + + SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); + SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); + RHS = SDValue(); + CC = SDValue(); + break; + } + } +} + +/// EmitStackConvert - Emit a store/load combination to the stack. This stores +/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does +/// a load from the stack slot to DestVT, extending it if needed. +/// The resultant code need not be legal. +SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, + MVT SlotVT, + MVT DestVT, + DebugLoc dl) { + // Create the stack frame object. + unsigned SrcAlign = + TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType(). + getTypeForMVT()); + SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); + + FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr); + int SPFI = StackPtrFI->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + + unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); + unsigned SlotSize = SlotVT.getSizeInBits(); + unsigned DestSize = DestVT.getSizeInBits(); + unsigned DestAlign = + TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForMVT()); + + // Emit a store to the stack slot. Use a truncstore if the input value is + // later than DestVT. + SDValue Store; + + if (SrcSize > SlotSize) + Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, + SV, 0, SlotVT, false, SrcAlign); + else { + assert(SrcSize == SlotSize && "Invalid store"); + Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, + SV, 0, false, SrcAlign); + } + + // Result is a load from the stack slot. + if (SlotSize == DestSize) + return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, DestAlign); + + assert(SlotSize < DestSize && "Unknown extension!"); + return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT, + false, DestAlign); +} + +SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { + DebugLoc dl = Node->getDebugLoc(); + // Create a vector sized/aligned stack slot, store the value to element #0, + // then load the whole vector back out. + SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0)); + + FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr); + int SPFI = StackPtrFI->getIndex(); + + SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0), + StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0, + Node->getValueType(0).getVectorElementType()); + return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, + PseudoSourceValue::getFixedStack(SPFI), 0); +} + + +/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't +/// support the operation, but do support the resultant vector type. +SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { + unsigned NumElems = Node->getNumOperands(); + SDValue SplatValue = Node->getOperand(0); + DebugLoc dl = Node->getDebugLoc(); + MVT VT = Node->getValueType(0); + MVT OpVT = SplatValue.getValueType(); + MVT EltVT = VT.getVectorElementType(); + + // If the only non-undef value is the low element, turn this into a + // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X. + bool isOnlyLowElement = true; + + // FIXME: it would be far nicer to change this into map<SDValue,uint64_t> + // and use a bitmask instead of a list of elements. + // FIXME: this doesn't treat <0, u, 0, u> for example, as a splat. + std::map<SDValue, std::vector<unsigned> > Values; + Values[SplatValue].push_back(0); + bool isConstant = true; + if (!isa<ConstantFPSDNode>(SplatValue) && !isa<ConstantSDNode>(SplatValue) && + SplatValue.getOpcode() != ISD::UNDEF) + isConstant = false; + + for (unsigned i = 1; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + Values[V].push_back(i); + if (V.getOpcode() != ISD::UNDEF) + isOnlyLowElement = false; + if (SplatValue != V) + SplatValue = SDValue(0, 0); + + // If this isn't a constant element or an undef, we can't use a constant + // pool load. + if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V) && + V.getOpcode() != ISD::UNDEF) + isConstant = false; + } + + if (isOnlyLowElement) { + // If the low element is an undef too, then this whole things is an undef. + if (Node->getOperand(0).getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + // Otherwise, turn this into a scalar_to_vector node. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0)); + } + + // If all elements are constants, create a load from the constant pool. + if (isConstant) { + std::vector<Constant*> CV; + for (unsigned i = 0, e = NumElems; i != e; ++i) { + if (ConstantFPSDNode *V = + dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) { + CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue())); + } else if (ConstantSDNode *V = + dyn_cast<ConstantSDNode>(Node->getOperand(i))) { + CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); + } else { + assert(Node->getOperand(i).getOpcode() == ISD::UNDEF); + const Type *OpNTy = OpVT.getTypeForMVT(); + CV.push_back(UndefValue::get(OpNTy)); + } + } + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, Alignment); + } + + if (SplatValue.getNode()) { // Splat of one value? + // Build the shuffle constant vector: <0, 0, 0, 0> + SmallVector<int, 8> ZeroVec(NumElems, 0); + + // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it. + if (TLI.isShuffleMaskLegal(ZeroVec, Node->getValueType(0))) { + // Get the splatted value into the low element of a vector register. + SDValue LowValVec = + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SplatValue); + + // Return shuffle(LowValVec, undef, <0,0,0,0>) + return DAG.getVectorShuffle(VT, dl, LowValVec, DAG.getUNDEF(VT), + &ZeroVec[0]); + } + } + + // If there are only two unique elements, we may be able to turn this into a + // vector shuffle. + if (Values.size() == 2) { + // Get the two values in deterministic order. + SDValue Val1 = Node->getOperand(1); + SDValue Val2; + std::map<SDValue, std::vector<unsigned> >::iterator MI = Values.begin(); + if (MI->first != Val1) + Val2 = MI->first; + else + Val2 = (++MI)->first; + + // If Val1 is an undef, make sure it ends up as Val2, to ensure that our + // vector shuffle has the undef vector on the RHS. + if (Val1.getOpcode() == ISD::UNDEF) + std::swap(Val1, Val2); + + // Build the shuffle constant vector: e.g. <0, 4, 0, 4> + SmallVector<int, 8> ShuffleMask(NumElems, -1); + + // Set elements of the shuffle mask for Val1. + std::vector<unsigned> &Val1Elts = Values[Val1]; + for (unsigned i = 0, e = Val1Elts.size(); i != e; ++i) + ShuffleMask[Val1Elts[i]] = 0; + + // Set elements of the shuffle mask for Val2. + std::vector<unsigned> &Val2Elts = Values[Val2]; + for (unsigned i = 0, e = Val2Elts.size(); i != e; ++i) + if (Val2.getOpcode() != ISD::UNDEF) + ShuffleMask[Val2Elts[i]] = NumElems; + + // If the target supports SCALAR_TO_VECTOR and this shuffle mask, use it. + if (TLI.isOperationLegalOrCustom(ISD::SCALAR_TO_VECTOR, VT) && + TLI.isShuffleMaskLegal(ShuffleMask, VT)) { + Val1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val1); + Val2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val2); + return DAG.getVectorShuffle(VT, dl, Val1, Val2, &ShuffleMask[0]); + } + } + + // Otherwise, we can't handle this case efficiently. Allocate a sufficiently + // aligned object on the stack, store each element into it, then load + // the result as a vector. + // Create the stack frame object. + SDValue FIPtr = DAG.CreateStackTemporary(VT); + int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store of each element to the stack slot. + SmallVector<SDValue, 8> Stores; + unsigned TypeByteSize = OpVT.getSizeInBits() / 8; + // Store (in the right endianness) the elements to memory. + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + // Ignore undef elements. + if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + unsigned Offset = TypeByteSize*i; + + SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); + + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), + Idx, SV, Offset)); + } + + SDValue StoreChain; + if (!Stores.empty()) // Not all undef elements? + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + else + StoreChain = DAG.getEntryNode(); + + // Result is a load from the stack slot. + return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0); +} + +// ExpandLibCall - Expand a node into a call to a libcall. If the result value +// does not fit into a register, return the lo part and set the hi part to the +// by-reg argument. If it does fit into a single register, return the result +// and leave the Hi part unset. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, + bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + MVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForMVT(); + Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = Node->getValueType(0).getTypeForMVT(); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + CallingConv::C, false, Callee, Args, DAG, + Node->getDebugLoc()); + + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo.first; +} + +SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_PPCF128) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT()) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::f32: LC = Call_F32; break; + case MVT::f64: LC = Call_F64; break; + case MVT::f80: LC = Call_F80; break; + case MVT::ppcf128: LC = Call_PPCF128; break; + } + return ExpandLibCall(LC, Node, false); +} + +SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT()) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::i16: LC = Call_I16; break; + case MVT::i32: LC = Call_I32; break; + case MVT::i64: LC = Call_I64; break; + case MVT::i128: LC = Call_I128; break; + } + return ExpandLibCall(LC, Node, isSigned); +} + +/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a +/// INT_TO_FP operation of the specified operand when the target requests that +/// we expand it. At this point, we know that the result and operand types are +/// legal for the target. +SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, + SDValue Op0, + MVT DestVT, + DebugLoc dl) { + if (Op0.getValueType() == MVT::i32) { + // simple 32-bit [signed|unsigned] integer to float/double expansion + + // Get the stack frame index of a 8 byte buffer. + SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); + + // word offset constant for Hi/Lo address computation + SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy()); + // set up Hi and Lo (into buffer) address based on endian + SDValue Hi = StackSlot; + SDValue Lo = DAG.getNode(ISD::ADD, dl, + TLI.getPointerTy(), StackSlot, WordOff); + if (TLI.isLittleEndian()) + std::swap(Hi, Lo); + + // if signed map to unsigned space + SDValue Op0Mapped; + if (isSigned) { + // constant used to invert sign bit (signed to unsigned mapping) + SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32); + Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit); + } else { + Op0Mapped = Op0; + } + // store the lo of the constructed double - based on integer input + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, + Op0Mapped, Lo, NULL, 0); + // initial hi portion of constructed double + SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32); + // store the hi of the constructed double - biased exponent + SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0); + // load the constructed double + SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0); + // FP constant to bias correct the final result + SDValue Bias = DAG.getConstantFP(isSigned ? + BitsToDouble(0x4330000080000000ULL) : + BitsToDouble(0x4330000000000000ULL), + MVT::f64); + // subtract the bias + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); + // final result + SDValue Result; + // handle final rounding + if (DestVT == MVT::f64) { + // do nothing + Result = Sub; + } else if (DestVT.bitsLT(MVT::f64)) { + Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, + DAG.getIntPtrConstant(0)); + } else if (DestVT.bitsGT(MVT::f64)) { + Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); + } + return Result; + } + assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); + + SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, Op0.getValueType()), + ISD::SETLT); + SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4); + SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), + SignSet, Four, Zero); + + // If the sign bit of the integer is set, the large number will be treated + // as a negative number. To counteract this, the dynamic code adds an + // offset depending on the data type. + uint64_t FF; + switch (Op0.getValueType().getSimpleVT()) { + default: assert(0 && "Unsupported integer type!"); + case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) + case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) + case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) + case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float) + } + if (TLI.isLittleEndian()) FF <<= 32; + Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF); + + SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset); + Alignment = std::min(Alignment, 4u); + SDValue FudgeInReg; + if (DestVT == MVT::f32) + FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, Alignment); + else { + FudgeInReg = + LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + MVT::f32, false, Alignment)); + } + + return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); +} + +/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a +/// *INT_TO_FP operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP +/// operation that takes a larger input. +SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, + MVT DestVT, + bool isSigned, + DebugLoc dl) { + // First step, figure out the appropriate *INT_TO_FP operation to use. + MVT NewInTy = LegalOp.getValueType(); + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (1) { + NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1); + assert(NewInTy.isInteger() && "Ran out of possibilities!"); + + // If the target supports SINT_TO_FP of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) { + OpToUse = ISD::SINT_TO_FP; + break; + } + if (isSigned) continue; + + // If the target supports UINT_TO_FP of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) { + OpToUse = ISD::UINT_TO_FP; + break; + } + + // Otherwise, try a larger type. + } + + // Okay, we found the operation and type to use. Zero extend our input to the + // desired type then run the operation on it. + return DAG.getNode(OpToUse, dl, DestVT, + DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + dl, NewInTy, LegalOp)); +} + +/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a +/// FP_TO_*INT operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT +/// operation that returns a larger result. +SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, + MVT DestVT, + bool isSigned, + DebugLoc dl) { + // First step, figure out the appropriate FP_TO*INT operation to use. + MVT NewOutTy = DestVT; + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (1) { + NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT()+1); + assert(NewOutTy.isInteger() && "Ran out of possibilities!"); + + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { + OpToUse = ISD::FP_TO_SINT; + break; + } + + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { + OpToUse = ISD::FP_TO_UINT; + break; + } + + // Otherwise, try a larger type. + } + + + // Okay, we found the operation and type to use. + SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); + + // Truncate the result of the extended FP_TO_*INT operation to the desired + // size. + return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); +} + +/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation. +/// +SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { + MVT VT = Op.getValueType(); + MVT SHVT = TLI.getShiftAmountTy(); + SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; + switch (VT.getSimpleVT()) { + default: assert(0 && "Unhandled Expand type in BSWAP!"); abort(); + case MVT::i16: + Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + case MVT::i32: + Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT)); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + case MVT::i64: + Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT)); + Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT)); + Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT)); + Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT)); + Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT)); + Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT)); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); + Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4); + } +} + +/// ExpandBitCount - Expand the specified bitcount instruction into operations. +/// +SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, + DebugLoc dl) { + switch (Opc) { + default: assert(0 && "Cannot expand this yet!"); + case ISD::CTPOP: { + static const uint64_t mask[6] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, + 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, + 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL + }; + MVT VT = Op.getValueType(); + MVT ShVT = TLI.getShiftAmountTy(); + unsigned len = VT.getSizeInBits(); + for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8]) + unsigned EltSize = VT.isVector() ? + VT.getVectorElementType().getSizeInBits() : len; + SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT); + SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); + Op = DAG.getNode(ISD::ADD, dl, VT, + DAG.getNode(ISD::AND, dl, VT, Op, Tmp2), + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3), + Tmp2)); + } + return Op; + } + case ISD::CTLZ: { + // for now, we do this: + // x = x | (x >> 1); + // x = x | (x >> 2); + // ... + // x = x | (x >>16); + // x = x | (x >>32); // for 64-bit input + // return popcount(~x); + // + // but see also: http://www.hackersdelight.org/HDcode/nlz.cc + MVT VT = Op.getValueType(); + MVT ShVT = TLI.getShiftAmountTy(); + unsigned len = VT.getSizeInBits(); + for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); + Op = DAG.getNode(ISD::OR, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); + } + Op = DAG.getNOT(dl, Op, VT); + return DAG.getNode(ISD::CTPOP, dl, VT, Op); + } + case ISD::CTTZ: { + // for now, we use: { return popcount(~x & (x - 1)); } + // unless the target has ctlz but not ctpop, in which case we use: + // { return 32 - nlz(~x & (x-1)); } + // see also http://www.hackersdelight.org/HDcode/ntz.cc + MVT VT = Op.getValueType(); + SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNOT(dl, Op, VT), + DAG.getNode(ISD::SUB, dl, VT, Op, + DAG.getConstant(1, VT))); + // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. + if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && + TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) + return DAG.getNode(ISD::SUB, dl, VT, + DAG.getConstant(VT.getSizeInBits(), VT), + DAG.getNode(ISD::CTLZ, dl, VT, Tmp3)); + return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3); + } + } +} + +void SelectionDAGLegalize::ExpandNode(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + switch (Node->getOpcode()) { + case ISD::CTPOP: + case ISD::CTLZ: + case ISD::CTTZ: + Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); + Results.push_back(Tmp1); + break; + case ISD::BSWAP: + Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); + break; + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + case ISD::FRAME_TO_ARGS_OFFSET: + Results.push_back(DAG.getConstant(0, Node->getValueType(0))); + break; + case ISD::FLT_ROUNDS_: + Results.push_back(DAG.getConstant(1, Node->getValueType(0))); + break; + case ISD::EH_RETURN: + case ISD::DECLARE: + case ISD::DBG_LABEL: + case ISD::EH_LABEL: + case ISD::PREFETCH: + case ISD::MEMBARRIER: + case ISD::VAEND: + Results.push_back(Node->getOperand(0)); + break; + case ISD::DBG_STOPPOINT: + Results.push_back(ExpandDBG_STOPPOINT(Node)); + break; + case ISD::DYNAMIC_STACKALLOC: + ExpandDYNAMIC_STACKALLOC(Node, Results); + break; + case ISD::MERGE_VALUES: + for (unsigned i = 0; i < Node->getNumValues(); i++) + Results.push_back(Node->getOperand(i)); + break; + case ISD::UNDEF: { + MVT VT = Node->getValueType(0); + if (VT.isInteger()) + Results.push_back(DAG.getConstant(0, VT)); + else if (VT.isFloatingPoint()) + Results.push_back(DAG.getConstantFP(0, VT)); + else + assert(0 && "Unknown value type!"); + break; + } + case ISD::TRAP: { + // If this operation is not supported, lower it to 'abort()' call + TargetLowering::ArgListTy Args; + std::pair<SDValue, SDValue> CallResult = + TLI.LowerCallTo(Node->getOperand(0), Type::VoidTy, + false, false, false, false, CallingConv::C, false, + DAG.getExternalSymbol("abort", TLI.getPointerTy()), + Args, DAG, dl); + Results.push_back(CallResult.second); + break; + } + case ISD::FP_ROUND: + case ISD::BIT_CONVERT: + Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::FP_EXTEND: + Tmp1 = EmitStackConvert(Node->getOperand(0), + Node->getOperand(0).getValueType(), + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::SIGN_EXTEND_INREG: { + // NOTE: we could fall back on load/store here too for targets without + // SAR. However, it is doubtful that any exist. + MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + unsigned BitsDiff = Node->getValueType(0).getSizeInBits() - + ExtraVT.getSizeInBits(); + SDValue ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy()); + Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), + Node->getOperand(0), ShiftCst); + Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst); + Results.push_back(Tmp1); + break; + } + case ISD::FP_ROUND_INREG: { + // The only way we can lower this is to turn it into a TRUNCSTORE, + // EXTLOAD pair, targetting a temporary location (a stack slot). + + // NOTE: there is a choice here between constantly creating new stack + // slots and always reusing the same one. We currently always create + // new ones, as reuse may inhibit scheduling. + MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT, + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + } + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, + Node->getOperand(0), Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::FP_TO_UINT: { + SDValue True, False; + MVT VT = Node->getOperand(0).getValueType(); + MVT NVT = Node->getValueType(0); + const uint64_t zero[] = {0, 0}; + APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero)); + APInt x = APInt::getSignBit(NVT.getSizeInBits()); + (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); + Tmp1 = DAG.getConstantFP(apf, VT); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + Node->getOperand(0), + Tmp1, ISD::SETLT); + True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); + False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, + DAG.getNode(ISD::FSUB, dl, VT, + Node->getOperand(0), Tmp1)); + False = DAG.getNode(ISD::XOR, dl, NVT, False, + DAG.getConstant(x, NVT)); + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False); + Results.push_back(Tmp1); + break; + } + case ISD::VAARG: { + const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); + MVT VT = Node->getValueType(0); + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0); + // Increment the pointer, VAList, to the next vaarg + Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(TLI.getTargetData()-> + getTypeAllocSize(VT.getTypeForMVT()), + TLI.getPointerTy())); + // Store the incremented VAList to the legalized pointer + Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0); + // Load the actual argument out of the pointer VAList + Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0)); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::VACOPY: { + // This defaults to loading a pointer from the input and storing it to the + // output, returning the chain. + const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); + const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); + Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), + Node->getOperand(2), VS, 0); + Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0); + Results.push_back(Tmp1); + break; + } + case ISD::EXTRACT_VECTOR_ELT: + if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) + // This must be an access of the only element. Return it. + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), + Node->getOperand(0)); + else + Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); + Results.push_back(Tmp1); + break; + case ISD::EXTRACT_SUBVECTOR: + Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0))); + break; + case ISD::CONCAT_VECTORS: { + // Use extract/insert/build vector for now. We might try to be + // more clever later. + SmallVector<SDValue, 8> Ops; + unsigned NumOperands = Node->getNumOperands(); + for (unsigned i=0; i < NumOperands; ++i) { + SDValue SubOp = Node->getOperand(i); + MVT VVT = SubOp.getNode()->getValueType(0); + MVT EltVT = VVT.getVectorElementType(); + unsigned NumSubElem = VVT.getVectorNumElements(); + for (unsigned j=0; j < NumSubElem; ++j) { + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, + DAG.getIntPtrConstant(j))); + } + } + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Ops[0], Ops.size()); + Results.push_back(Tmp1); + break; + } + case ISD::SCALAR_TO_VECTOR: + Results.push_back(ExpandSCALAR_TO_VECTOR(Node)); + break; + case ISD::INSERT_VECTOR_ELT: + Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0), + Node->getOperand(1), + Node->getOperand(2), dl)); + break; + case ISD::VECTOR_SHUFFLE: { + SmallVector<int, 8> Mask; + cast<ShuffleVectorSDNode>(Node)->getMask(Mask); + + MVT VT = Node->getValueType(0); + MVT EltVT = VT.getVectorElementType(); + unsigned NumElems = VT.getVectorNumElements(); + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i != NumElems; ++i) { + if (Mask[i] < 0) { + Ops.push_back(DAG.getUNDEF(EltVT)); + continue; + } + unsigned Idx = Mask[i]; + if (Idx < NumElems) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Node->getOperand(0), + DAG.getIntPtrConstant(Idx))); + else + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Node->getOperand(1), + DAG.getIntPtrConstant(Idx - NumElems))); + } + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); + Results.push_back(Tmp1); + break; + } + case ISD::EXTRACT_ELEMENT: { + MVT OpTy = Node->getOperand(0).getValueType(); + if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { + // 1 -> Hi + Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), + DAG.getConstant(OpTy.getSizeInBits()/2, + TLI.getShiftAmountTy())); + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); + } else { + // 0 -> Lo + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), + Node->getOperand(0)); + } + Results.push_back(Tmp1); + break; + } + case ISD::STACKSAVE: + // Expand to CopyFromReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + } else { + Results.push_back(DAG.getUNDEF(Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); + } + break; + case ISD::STACKRESTORE: + // Expand to CopyToReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP, + Node->getOperand(1))); + } else { + Results.push_back(Node->getOperand(0)); + } + break; + case ISD::FCOPYSIGN: + Results.push_back(ExpandFCOPYSIGN(Node)); + break; + case ISD::FNEG: + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0)); + Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, + Node->getOperand(0)); + Results.push_back(Tmp1); + break; + case ISD::FABS: { + // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). + MVT VT = Node->getValueType(0); + Tmp1 = Node->getOperand(0); + Tmp2 = DAG.getConstantFP(0.0, VT); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp1, Tmp2, ISD::SETUGT); + Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1); + Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3); + Results.push_back(Tmp1); + break; + } + case ISD::FSQRT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128)); + break; + case ISD::FSIN: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_PPCF128)); + break; + case ISD::FCOS: + Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_PPCF128)); + break; + case ISD::FLOG: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_PPCF128)); + break; + case ISD::FLOG2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128)); + break; + case ISD::FLOG10: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128)); + break; + case ISD::FEXP: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_PPCF128)); + break; + case ISD::FEXP2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128)); + break; + case ISD::FTRUNC: + Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128)); + break; + case ISD::FFLOOR: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128)); + break; + case ISD::FCEIL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128)); + break; + case ISD::FRINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_PPCF128)); + break; + case ISD::FNEARBYINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_PPCF128)); + break; + case ISD::FPOWI: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_PPCF128)); + break; + case ISD::FPOW: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_PPCF128)); + break; + case ISD::FDIV: + Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, + RTLIB::DIV_F80, RTLIB::DIV_PPCF128)); + break; + case ISD::FREM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_PPCF128)); + break; + case ISD::ConstantFP: { + ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); + // Check to see if this FP immediate is already legal. + bool isLegal = false; + for (TargetLowering::legal_fpimm_iterator I = TLI.legal_fpimm_begin(), + E = TLI.legal_fpimm_end(); I != E; ++I) { + if (CFP->isExactlyValue(*I)) { + isLegal = true; + break; + } + } + // If this is a legal constant, turn it into a TargetConstantFP node. + if (isLegal) + Results.push_back(SDValue(Node, 0)); + else + Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI)); + break; + } + case ISD::EHSELECTION: { + unsigned Reg = TLI.getExceptionSelectorRegister(); + assert(Reg && "Can't expand to unknown register!"); + Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::EXCEPTIONADDR: { + unsigned Reg = TLI.getExceptionAddressRegister(); + assert(Reg && "Can't expand to unknown register!"); + Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + break; + } + case ISD::SUB: { + MVT VT = Node->getValueType(0); + assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && + TLI.isOperationLegalOrCustom(ISD::XOR, VT) && + "Don't know how to expand this subtraction!"); + Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); + Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT)); + Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); + break; + } + case ISD::UREM: + case ISD::SREM: { + MVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + bool isSigned = Node->getOpcode() == ISD::SREM; + unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + Tmp2 = Node->getOperand(0); + Tmp3 = Node->getOperand(1); + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); + } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { + // X % Y -> X-X/Y*Y + Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); + Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); + } else if (isSigned) { + Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128); + } else { + Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128); + } + Results.push_back(Tmp1); + break; + } + case ISD::UDIV: + case ISD::SDIV: { + bool isSigned = Node->getOpcode() == ISD::SDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + MVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) + Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + else if (isSigned) + Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128); + else + Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128); + Results.push_back(Tmp1); + break; + } + case ISD::MULHU: + case ISD::MULHS: { + unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : + ISD::SMUL_LOHI; + MVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) && + "If this wasn't legal, it shouldn't have been created!"); + Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + Results.push_back(Tmp1.getValue(1)); + break; + } + case ISD::MUL: { + MVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + // See if multiply or divide can be lowered using two-result operations. + // We just need the low half of the multiply; try both the signed + // and unsigned forms. If the target supports both SMUL_LOHI and + // UMUL_LOHI, form a preference by checking which forms of plain + // MULH it supports. + bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT); + bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT); + bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT); + bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT); + unsigned OpToUse = 0; + if (HasSMUL_LOHI && !HasMULHS) { + OpToUse = ISD::SMUL_LOHI; + } else if (HasUMUL_LOHI && !HasMULHU) { + OpToUse = ISD::UMUL_LOHI; + } else if (HasSMUL_LOHI) { + OpToUse = ISD::SMUL_LOHI; + } else if (HasUMUL_LOHI) { + OpToUse = ISD::UMUL_LOHI; + } + if (OpToUse) { + Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0), + Node->getOperand(1))); + break; + } + Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128); + Results.push_back(Tmp1); + break; + } + case ISD::SADDO: + case ISD::SSUBO: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + Results.push_back(Sum); + MVT OType = Node->getValueType(1); + + SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + Results.push_back(Cmp); + break; + } + case ISD::UADDO: + case ISD::USUBO: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + Results.push_back(Sum); + Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS, + Node->getOpcode () == ISD::UADDO ? + ISD::SETULT : ISD::SETUGT)); + break; + } + case ISD::BUILD_PAIR: { + MVT PairTy = Node->getValueType(0); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); + Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2, + DAG.getConstant(PairTy.getSizeInBits()/2, + TLI.getShiftAmountTy())); + Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); + break; + } + case ISD::SELECT: + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + if (Tmp1.getOpcode() == ISD::SETCC) { + Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1), + Tmp2, Tmp3, + cast<CondCodeSDNode>(Tmp1.getOperand(2))->get()); + } else { + Tmp1 = DAG.getSelectCC(dl, Tmp1, + DAG.getConstant(0, Tmp1.getValueType()), + Tmp2, Tmp3, ISD::SETNE); + } + Results.push_back(Tmp1); + break; + case ISD::BR_JT: { + SDValue Chain = Node->getOperand(0); + SDValue Table = Node->getOperand(1); + SDValue Index = Node->getOperand(2); + + MVT PTy = TLI.getPointerTy(); + MachineFunction &MF = DAG.getMachineFunction(); + unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize(); + Index= DAG.getNode(ISD::MUL, dl, PTy, + Index, DAG.getConstant(EntrySize, PTy)); + SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); + + MVT MemVT = MVT::getIntegerVT(EntrySize * 8); + SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, + PseudoSourceValue::getJumpTable(), 0, MemVT); + Addr = LD; + if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) { + // For PIC, the sequence is: + // BRIND(load(Jumptable + index) + RelocBase) + // RelocBase can be JumpTable, GOT or some sort of global base. + Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, + TLI.getPICJumpTableRelocBase(Table, DAG)); + } + Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr); + Results.push_back(Tmp1); + break; + } + case ISD::BRCOND: + // Expand brcond's setcc into its constituent parts and create a BR_CC + // Node. + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + if (Tmp2.getOpcode() == ISD::SETCC) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, + Tmp1, Tmp2.getOperand(2), + Tmp2.getOperand(0), Tmp2.getOperand(1), + Node->getOperand(2)); + } else { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, + DAG.getCondCode(ISD::SETNE), Tmp2, + DAG.getConstant(0, Tmp2.getValueType()), + Node->getOperand(2)); + } + Results.push_back(Tmp1); + break; + case ISD::SETCC: { + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl); + + // If we expanded the SETCC into an AND/OR, return the new node + if (Tmp2.getNode() == 0) { + Results.push_back(Tmp1); + break; + } + + // Otherwise, SETCC for the given comparison type must be completely + // illegal; expand it into a SELECT_CC. + MVT VT = Node->getValueType(0); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, + DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3); + Results.push_back(Tmp1); + break; + } + case ISD::SELECT_CC: { + Tmp1 = Node->getOperand(0); // LHS + Tmp2 = Node->getOperand(1); // RHS + Tmp3 = Node->getOperand(2); // True + Tmp4 = Node->getOperand(3); // False + SDValue CC = Node->getOperand(4); + + LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp1, Tmp2, CC, dl); + + assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!"); + Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); + CC = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, + Tmp3, Tmp4, CC); + Results.push_back(Tmp1); + break; + } + case ISD::BR_CC: { + Tmp1 = Node->getOperand(0); // Chain + Tmp2 = Node->getOperand(2); // LHS + Tmp3 = Node->getOperand(3); // RHS + Tmp4 = Node->getOperand(1); // CC + + LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), + Tmp2, Tmp3, Tmp4, dl); + LastCALLSEQ_END = DAG.getEntryNode(); + + assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); + Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); + Tmp4 = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, + Tmp3, Node->getOperand(4)); + Results.push_back(Tmp1); + break; + } + case ISD::GLOBAL_OFFSET_TABLE: + case ISD::GlobalAddress: + case ISD::GlobalTLSAddress: + case ISD::ExternalSymbol: + case ISD::ConstantPool: + case ISD::JumpTable: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + // FIXME: Custom lowering for these operations shouldn't return null! + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + Results.push_back(SDValue(Node, i)); + break; + } +} +void SelectionDAGLegalize::PromoteNode(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + MVT OVT = Node->getValueType(0); + if (Node->getOpcode() == ISD::UINT_TO_FP || + Node->getOpcode() == ISD::SINT_TO_FP) { + OVT = Node->getOperand(0).getValueType(); + } + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + DebugLoc dl = Node->getDebugLoc(); + SDValue Tmp1, Tmp2, Tmp3; + switch (Node->getOpcode()) { + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + // Zero extend the argument. + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + // Perform the larger operation. + Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1); + if (Node->getOpcode() == ISD::CTTZ) { + //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT) + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), + ISD::SETEQ); + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, + DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); + } else if (Node->getOpcode() == ISD::CTLZ) { + // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) + Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, + DAG.getConstant(NVT.getSizeInBits() - + OVT.getSizeInBits(), NVT)); + } + Results.push_back(Tmp1); + break; + case ISD::BSWAP: { + unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Tmp1); + Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1); + Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1, + DAG.getConstant(DiffBits, TLI.getShiftAmountTy())); + Results.push_back(Tmp1); + break; + } + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0), + Node->getOpcode() == ISD::FP_TO_SINT, dl); + Results.push_back(Tmp1); + break; + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: + Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0), + Node->getOpcode() == ISD::SINT_TO_FP, dl); + Results.push_back(Tmp1); + break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: + assert(OVT.isVector() && "Don't know how to promote scalar logic ops"); + // Bit convert each of the values to the new type. + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1)); + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + // Bit convert the result back the original type. + Results.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1)); + break; + case ISD::SELECT: + unsigned ExtOp, TruncOp; + if (Node->getValueType(0).isVector()) { + ExtOp = ISD::BIT_CONVERT; + TruncOp = ISD::BIT_CONVERT; + } else if (Node->getValueType(0).isInteger()) { + ExtOp = ISD::ANY_EXTEND; + TruncOp = ISD::TRUNCATE; + } else { + ExtOp = ISD::FP_EXTEND; + TruncOp = ISD::FP_ROUND; + } + Tmp1 = Node->getOperand(0); + // Promote each of the values to the new type. + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); + // Perform the larger operation, then round down. + Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3); + if (TruncOp != ISD::FP_ROUND) + Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); + else + Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1, + DAG.getIntPtrConstant(0)); + Results.push_back(Tmp1); + break; + case ISD::VECTOR_SHUFFLE: { + SmallVector<int, 8> Mask; + cast<ShuffleVectorSDNode>(Node)->getMask(Mask); + + // Cast the two input vectors. + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1)); + + // Convert the shuffle mask to the right # elements. + Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask); + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1); + Results.push_back(Tmp1); + break; + } + case ISD::SETCC: { + // First step, figure out the appropriate operation to use. + // Allow SETCC to not be supported for all legal data types + // Mostly this targets FP + MVT NewInTy = Node->getOperand(0).getValueType(); + MVT OldVT = NewInTy; OldVT = OldVT; + + // Scan for the appropriate larger type to use. + while (1) { + NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1); + + assert(NewInTy.isInteger() == OldVT.isInteger() && + "Fell off of the edge of the integer world"); + assert(NewInTy.isFloatingPoint() == OldVT.isFloatingPoint() && + "Fell off of the edge of the floating point world"); + + // If the target supports SETCC of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::SETCC, NewInTy)) + break; + } + if (NewInTy.isInteger()) + assert(0 && "Cannot promote Legal Integer SETCC yet"); + else { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp1); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp2); + } + Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), + Tmp1, Tmp2, Node->getOperand(2))); + break; + } + } +} + +// SelectionDAG::Legalize - This is the entry point for the file. +// +void SelectionDAG::Legalize(bool TypesNeedLegalizing, + CodeGenOpt::Level OptLevel) { + /// run - This is the main entry point to this class. + /// + SelectionDAGLegalize(*this, OptLevel).LegalizeDAG(); +} + diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp new file mode 100644 index 000000000000..c3c1beabd5f0 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -0,0 +1,1388 @@ +//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements float type expansion and softening for LegalizeTypes. +// Softening is the act of turning a computation in an illegal floating point +// type into a computation in an integer type of the same size; also known as +// "soft float". For example, turning f32 arithmetic into operations using i32. +// The resulting integer value is the same as what you would get by performing +// the floating point operation and bitcasting the result to the integer type. +// Expansion is the act of changing a computation in an illegal type to be a +// computation in two identical registers of a smaller type. For example, +// implementing ppcf128 arithmetic in two f64 registers. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +using namespace llvm; + +/// GetFPLibCall - Return the right libcall for the given floating point type. +static RTLIB::Libcall GetFPLibCall(MVT VT, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_PPCF128) { + return + VT == MVT::f32 ? Call_F32 : + VT == MVT::f64 ? Call_F64 : + VT == MVT::f80 ? Call_F80 : + VT == MVT::ppcf128 ? Call_PPCF128 : + RTLIB::UNKNOWN_LIBCALL; +} + +//===----------------------------------------------------------------------===// +// Result Float to Integer Conversion. +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Soften float result " << ResNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "SoftenFloatResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to soften the result of this operator!"); + abort(); + + case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break; + case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; + case ISD::ConstantFP: + R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N)); + break; + case ISD::EXTRACT_VECTOR_ELT: + R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FADD: R = SoftenFloatRes_FADD(N); break; + case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; + case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; + case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; + case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break; + case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break; + case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; + case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; + case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; + case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; + case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; + case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; + case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; + case ISD::FREM: R = SoftenFloatRes_FREM(N); break; + case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; + case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; + case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; + case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; + case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) + SetSoftenedFloat(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) { + return BitConvertToInteger(N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { + // Convert the inputs to integers, and build a new pair out of them. + return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(), + TLI.getTypeToTransformTo(N->getValueType(0)), + BitConvertToInteger(N->getOperand(0)), + BitConvertToInteger(N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { + return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), + TLI.getTypeToTransformTo(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + NewOp.getValueType().getVectorElementType(), + NewOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned Size = NVT.getSizeInBits(); + + // Mask = ~(1 << (Size-1)) + SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1), + NVT); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, + RTLIB::CEIL_F64, + RTLIB::CEIL_F80, + RTLIB::CEIL_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(0)); + SDValue RHS = BitConvertToInteger(N->getOperand(1)); + DebugLoc dl = N->getDebugLoc(); + + MVT LVT = LHS.getValueType(); + MVT RVT = RHS.getValueType(); + + unsigned LSize = LVT.getSizeInBits(); + unsigned RSize = RVT.getSizeInBits(); + + // First get the sign bit of second operand. + SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT), + DAG.getConstant(RSize - 1, + TLI.getShiftAmountTy())); + SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit); + + // Shift right or sign-extend it if the two operands have different types. + int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits(); + if (SizeDiff > 0) { + SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit, + DAG.getConstant(SizeDiff, TLI.getShiftAmountTy())); + SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit); + } else if (SizeDiff < 0) { + SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit); + SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit, + DAG.getConstant(-SizeDiff, TLI.getShiftAmountTy())); + } + + // Clear the sign bit of the first operand. + SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT), + DAG.getConstant(LSize - 1, + TLI.getShiftAmountTy())); + Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT)); + LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask); + + // Or the value with the sign bit. + return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, + RTLIB::COS_F64, + RTLIB::COS_F80, + RTLIB::COS_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, + RTLIB::EXP_F64, + RTLIB::EXP_F80, + RTLIB::EXP_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, + RTLIB::EXP2_F64, + RTLIB::EXP2_F80, + RTLIB::EXP2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, + RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, + RTLIB::FLOOR_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, + RTLIB::LOG_F64, + RTLIB::LOG_F80, + RTLIB::LOG_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, + RTLIB::LOG2_F64, + RTLIB::LOG2_F80, + RTLIB::LOG2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, + RTLIB::LOG10_F64, + RTLIB::LOG10_F80, + RTLIB::LOG10_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)), + GetSoftenedFloat(N->getOperand(0)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); + return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); + return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, + RTLIB::POW_F64, + RTLIB::POW_F80, + RTLIB::POW_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { + assert(N->getOperand(1).getValueType() == MVT::i32 && + "Unsupported power type!"); + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, + RTLIB::POWI_F64, + RTLIB::POWI_F80, + RTLIB::POWI_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, + RTLIB::RINT_F64, + RTLIB::RINT_F80, + RTLIB::RINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, + RTLIB::SIN_F64, + RTLIB::SIN_F80, + RTLIB::SIN_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, + RTLIB::SQRT_F64, + RTLIB::SQRT_F80, + RTLIB::SQRT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, + RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, + RTLIB::TRUNC_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { + LoadSDNode *L = cast<LoadSDNode>(N); + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + DebugLoc dl = N->getDebugLoc(); + + SDValue NewL; + if (L->getExtensionType() == ISD::NON_EXTLOAD) { + NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(), + NVT, L->getChain(), L->getBasePtr(), L->getOffset(), + L->getSrcValue(), L->getSrcValueOffset(), NVT, + L->isVolatile(), L->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return NewL; + } + + // Do a non-extending load followed by FP_EXTEND. + NewL = DAG.getLoad(L->getAddressingMode(), dl, ISD::NON_EXTLOAD, + L->getMemoryVT(), L->getChain(), + L->getBasePtr(), L->getOffset(), + L->getSrcValue(), L->getSrcValueOffset(), + L->getMemoryVT(), + L->isVolatile(), L->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(1)); + SDValue RHS = GetSoftenedFloat(N->getOperand(2)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0),LHS,RHS); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetSoftenedFloat(N->getOperand(2)); + SDValue RHS = GetSoftenedFloat(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { + SDValue Chain = N->getOperand(0); // Get the chain. + SDValue Ptr = N->getOperand(1); // Get the pointer. + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + DebugLoc dl = N->getDebugLoc(); + + SDValue NewVAARG; + NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); + return NewVAARG; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { + bool Signed = N->getOpcode() == ISD::SINT_TO_FP; + MVT SVT = N->getOperand(0).getValueType(); + MVT RVT = N->getValueType(0); + MVT NVT = MVT(); + DebugLoc dl = N->getDebugLoc(); + + // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to + // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly + // match. Look for an appropriate libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE; + t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) { + NVT = (MVT::SimpleValueType)t; + // The source needs to big enough to hold the operand. + if (NVT.bitsGE(SVT)) + LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT); + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + + // Sign/zero extend the argument if the libcall takes a larger type. + SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + NVT, N->getOperand(0)); + return MakeLibCall(LC, TLI.getTypeToTransformTo(RVT), &Op, 1, false, dl); +} + + +//===----------------------------------------------------------------------===// +// Operand Float to Integer Conversion.. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Soften float operand " << OpNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue Res = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "SoftenFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to soften this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break; + case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; + case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; + case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// SoftenSetCCOperands - Soften the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl) { + SDValue LHSInt = GetSoftenedFloat(NewLHS); + SDValue RHSInt = GetSoftenedFloat(NewRHS); + MVT VT = NewLHS.getValueType(); + + assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!"); + + // Expand into one or more soft-fp libcall(s). + RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; + switch (CCCode) { + case ISD::SETEQ: + case ISD::SETOEQ: + LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; + break; + case ISD::SETNE: + case ISD::SETUNE: + LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64; + break; + case ISD::SETGE: + case ISD::SETOGE: + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; + break; + case ISD::SETLT: + case ISD::SETOLT: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + break; + case ISD::SETLE: + case ISD::SETOLE: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; + break; + case ISD::SETGT: + case ISD::SETOGT: + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; + break; + case ISD::SETUO: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; + break; + case ISD::SETO: + LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64; + break; + default: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; + switch (CCCode) { + case ISD::SETONE: + // SETONE = SETOLT | SETOGT + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + // Fallthrough + case ISD::SETUGT: + LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; + break; + case ISD::SETUGE: + LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; + break; + case ISD::SETULT: + LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; + break; + case ISD::SETULE: + LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; + break; + case ISD::SETUEQ: + LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; + break; + default: assert(false && "Do not know how to soften this setcc!"); + } + } + + MVT RetVT = MVT::i32; // FIXME: is this the correct return type? + SDValue Ops[2] = { LHSInt, RHSInt }; + NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewRHS = DAG.getConstant(0, RetVT); + CCCode = TLI.getCmpLibcallCC(LC1); + if (LC2 != RTLIB::UNKNOWN_LIBCALL) { + SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), + NewLHS, NewRHS, DAG.getCondCode(CCCode)); + NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS, + NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2))); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); + NewRHS = SDValue(); + } +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) { + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0), + GetSoftenedFloat(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { + MVT SVT = N->getOperand(0).getValueType(); + MVT RVT = N->getValueType(0); + + RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If SoftenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { + MVT RVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { + MVT RVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If SoftenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If SoftenSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only soften the stored value!"); + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Val = ST->getValue(); + DebugLoc dl = N->getDebugLoc(); + + if (ST->isTruncatingStore()) + // Do an FP_ROUND followed by a non-truncating store. + Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), + Val, DAG.getIntPtrConstant(0))); + else + Val = GetSoftenedFloat(Val); + + return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->isVolatile(), ST->getAlignment()); +} + + +//===----------------------------------------------------------------------===// +// Float Result Expansion +//===----------------------------------------------------------------------===// + +/// ExpandFloatResult - This method is called when the specified result of the +/// specified node is found to need expansion. At this point, the node may also +/// have invalid operands or may have other results that need promotion, we just +/// know that (at least) one result needs expansion. +void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Expand float result: "; N->dump(&DAG); cerr << "\n"); + SDValue Lo, Hi; + Lo = Hi = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "ExpandFloatResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to expand the result of this operator!"); + abort(); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + + case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; + case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; + case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + + case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break; + case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; + case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; + case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; + case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break; + case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break; + case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break; + case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break; + case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break; + case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; + case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; + case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; + case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; + case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; + case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break; + case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; + case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; + case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; + case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; + case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; + case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; + case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetExpandedFloat(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + assert(NVT.getSizeInBits() == integerPartWidth && + "Do not know how to expand this float constant!"); + APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt(); + Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1, + &C.getRawData()[1])), NVT); + Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1, + &C.getRawData()[0])), NVT); +} + +void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + DebugLoc dl = N->getDebugLoc(); + SDValue Tmp; + GetExpandedFloat(N->getOperand(0), Lo, Tmp); + Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp); + // Lo = Hi==fabs(Hi) ? Lo : -Lo; + Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo, + DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo), + DAG.getCondCode(ISD::SETEQ)); +} + +void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32,RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32,RTLIB::LOG10_F64, + RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedFloat(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0)); + Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); +} + +void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, + SDValue &Hi) { + if (ISD::isNormalLoad(N)) { + ExpandRes_NormalLoad(N, Lo, Hi); + return; + } + + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + LoadSDNode *LD = cast<LoadSDNode>(N); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + DebugLoc dl = N->getDebugLoc(); + + MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0)); + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + + Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->getMemoryVT(), + LD->isVolatile(), LD->getAlignment()); + + // Remember the chain. + Chain = Hi.getValue(1); + + // The low part is zero. + Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + + // Modified the chain - switch anything that used the old chain to use the + // new one. + ReplaceValueWith(SDValue(LD, 1), Chain); +} + +void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!"); + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + SDValue Src = N->getOperand(0); + MVT SrcVT = Src.getValueType(); + bool isSigned = N->getOpcode() == ISD::SINT_TO_FP; + DebugLoc dl = N->getDebugLoc(); + + // First do an SINT_TO_FP, whether the original was signed or unsigned. + // When promoting partial word types to i32 we must honor the signedness, + // though. + if (SrcVT.bitsLE(MVT::i32)) { + // The integer can be represented exactly in an f64. + Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + MVT::i32, Src); + Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src); + } else { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (SrcVT.bitsLE(MVT::i64)) { + Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + MVT::i64, Src); + LC = RTLIB::SINTTOFP_I64_PPCF128; + } else if (SrcVT.bitsLE(MVT::i128)) { + Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src); + LC = RTLIB::SINTTOFP_I128_PPCF128; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + + Hi = MakeLibCall(LC, VT, &Src, 1, true, dl); + GetPairElements(Hi, Lo, Hi); + } + + if (isSigned) + return; + + // Unsigned - fix up the SINT_TO_FP value just calculated. + Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi); + SrcVT = Src.getValueType(); + + // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128. + static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 }; + static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 }; + static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 }; + const uint64_t *Parts = 0; + + switch (SrcVT.getSimpleVT()) { + default: + assert(false && "Unsupported UINT_TO_FP!"); + case MVT::i32: + Parts = TwoE32; + break; + case MVT::i64: + Parts = TwoE64; + break; + case MVT::i128: + Parts = TwoE128; + break; + } + + Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, + DAG.getConstantFP(APFloat(APInt(128, 2, Parts)), + MVT::ppcf128)); + Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT), + Lo, Hi, DAG.getCondCode(ISD::SETLT)); + GetPairElements(Lo, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Float Operand Expansion +//===----------------------------------------------------------------------===// + +/// ExpandFloatOperand - This method is called when the specified operand of the +/// specified node is found to need expansion. At this point, all of the result +/// types of the node are known to be legal, but other operands of the node may +/// need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Expand float operand: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType()) + == TargetLowering::Custom) + Res = TLI.LowerOperation(SDValue(N, 0), DAG); + + if (Res.getNode() == 0) { + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + cerr << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; + #endif + assert(0 && "Do not know how to expand this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + + case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; + case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; + case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N), + OpNo); break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code +/// is shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, + SDValue &NewRHS, + ISD::CondCode &CCCode, + DebugLoc dl) { + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedFloat(NewLHS, LHSLo, LHSHi); + GetExpandedFloat(NewRHS, RHSLo, RHSHi); + + MVT VT = NewLHS.getValueType(); + assert(VT == MVT::ppcf128 && "Unsupported setcc type!"); + + // FIXME: This generated code sucks. We want to generate + // FCMPU crN, hi1, hi2 + // BNE crN, L: + // FCMPU crN, lo1, lo2 + // The following can be improved, but not that much. + SDValue Tmp1, Tmp2, Tmp3; + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETOEQ); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, CCCode); + Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETUNE); + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode); + Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); + NewRHS = SDValue(); // LHS is the result, not a compare. +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Lo, Hi; + GetExpandedFloat(N->getOperand(0), Lo, Hi); + // Round it the rest of the way (e.g. to f32) if needed. + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + N->getValueType(0), Hi, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { + MVT RVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on + // PPC (the libcall is not available). FIXME: Do this in a less hacky way. + if (RVT == MVT::i32) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128, + N->getOperand(0), DAG.getValueType(MVT::f64)); + Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res, + DAG.getIntPtrConstant(1)); + return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); + } + + RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); + return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { + MVT RVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on + // PPC (the libcall is not available). FIXME: Do this in a less hacky way. + if (RVT == MVT::i32) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; + APFloat APF = APFloat(APInt(128, 2, TwoE31)); + SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); + // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X + // FIXME: generated code sucks. + return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp, + DAG.getNode(ISD::ADD, dl, MVT::i32, + DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, + DAG.getNode(ISD::FSUB, dl, + MVT::ppcf128, + N->getOperand(0), + Tmp)), + DAG.getConstant(0x80000000, MVT::i32)), + DAG.getNode(ISD::FP_TO_SINT, dl, + MVT::i32, N->getOperand(0)), + DAG.getCondCode(ISD::SETGE)); + } + + RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { + if (ISD::isNormalStore(N)) + return ExpandOp_NormalStore(N, OpNo); + + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + StoreSDNode *ST = cast<StoreSDNode>(N); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + + MVT NVT = TLI.getTypeToTransformTo(ST->getValue().getValueType()); + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + + SDValue Lo, Hi; + GetExpandedOp(ST->getValue(), Lo, Hi); + + return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr, + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getMemoryVT(), + ST->isVolatile(), ST->getAlignment()); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp new file mode 100644 index 000000000000..eb9342cc8b8e --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -0,0 +1,2382 @@ +//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements integer type expansion and promotion for LegalizeTypes. +// Promotion is the act of changing a computation in an illegal type into a +// computation in a larger type. For example, implementing i8 arithmetic in an +// i32 register (often needed on powerpc). +// Expansion is the act of changing a computation in an illegal type into a +// computation in two identical registers of a smaller type. For example, +// implementing i64 arithmetic in two i32 registers (often needed on 32-bit +// targets). +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Integer Result Promotion +//===----------------------------------------------------------------------===// + +/// PromoteIntegerResult - This method is called when a result of a node is +/// found to be in need of promotion to a larger type. At this point, the node +/// may also have invalid operands or may have other results that need +/// expansion, we just know that (at least) one result needs promotion. +void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Promote integer result: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "PromoteIntegerResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to promote this operator!"); + abort(); + case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; + case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; + case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break; + case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; + case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; + case ISD::Constant: Res = PromoteIntRes_Constant(N); break; + case ISD::CONVERT_RNDSAT: + Res = PromoteIntRes_CONVERT_RNDSAT(N); break; + case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; + case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; + case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; + case ISD::EXTRACT_VECTOR_ELT: + Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break; + case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; + case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; + case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; + case ISD::SHL: Res = PromoteIntRes_SHL(N); break; + case ISD::SIGN_EXTEND_INREG: + Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; + case ISD::SRA: Res = PromoteIntRes_SRA(N); break; + case ISD::SRL: Res = PromoteIntRes_SRL(N); break; + case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; + case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; + case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; + + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; + + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; + + case ISD::SDIV: + case ISD::SREM: Res = PromoteIntRes_SDIV(N); break; + + case ISD::UDIV: + case ISD::UREM: Res = PromoteIntRes_UDIV(N); break; + + case ISD::SADDO: + case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break; + case ISD::UADDO: + case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break; + case ISD::SMULO: + case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break; + + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_SWAP: + Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break; + + case ISD::ATOMIC_CMP_SWAP: + Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break; + } + + // If the result is null then the sub-method took care of registering it. + if (Res.getNode()) + SetPromotedInteger(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) { + // Sign-extend the new bits, and continue the assertion. + SDValue Op = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::AssertSext, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) { + // Zero the new bits, and continue the assertion. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::AssertZext, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + N->getMemoryVT(), + N->getChain(), N->getBasePtr(), + Op2, N->getSrcValue(), N->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Op3 = GetPromotedInteger(N->getOperand(3)); + SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + N->getMemoryVT(), N->getChain(), N->getBasePtr(), + Op2, Op3, N->getSrcValue(), N->getAlignment()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { + SDValue InOp = N->getOperand(0); + MVT InVT = InOp.getValueType(); + MVT NInVT = TLI.getTypeToTransformTo(InVT); + MVT OutVT = N->getValueType(0); + MVT NOutVT = TLI.getTypeToTransformTo(OutVT); + DebugLoc dl = N->getDebugLoc(); + + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + break; + case Legal: + break; + case PromoteInteger: + if (NOutVT.bitsEq(NInVT)) + // The input promotes to the same size. Convert the promoted value. + return DAG.getNode(ISD::BIT_CONVERT, dl, + NOutVT, GetPromotedInteger(InOp)); + break; + case SoftenFloat: + // Promote the integer operand by hand. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); + case ExpandInteger: + case ExpandFloat: + break; + case ScalarizeVector: + // Convert the element to an integer and promote it by hand. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + BitConvertToInteger(GetScalarizedVector(InOp))); + case SplitVector: { + // For example, i32 = BIT_CONVERT v2i16 on alpha. Convert the split + // pieces of the input into integers and reassemble in the final type. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + InOp = DAG.getNode(ISD::ANY_EXTEND, dl, + MVT::getIntegerVT(NOutVT.getSizeInBits()), + JoinIntegers(Lo, Hi)); + return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp); + } + case WidenVector: + if (OutVT.bitsEq(NInVT)) + // The input is widened to the same size. Convert to the widened value. + return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp)); + } + + // Otherwise, lower the bit-convert to a store/load from the stack. + // Create the stack frame object. Make sure it is aligned for both + // the source and destination types. + SDValue FIPtr = DAG.CreateStackTemporary(InVT, OutVT); + int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); + + // Result is an extending load from the stack slot. + return DAG.getExtLoad(ISD::EXTLOAD, dl, NOutVT, Store, FIPtr, SV, 0, OutVT); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + MVT OVT = N->getValueType(0); + MVT NVT = Op.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), + DAG.getConstant(DiffBits, TLI.getPointerTy())); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { + // The pair element type may be legal, or may not promote to the same type as + // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), + TLI.getTypeToTransformTo(N->getValueType(0)), + JoinIntegers(N->getOperand(0), N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { + MVT VT = N->getValueType(0); + // FIXME there is no actual debug info here + DebugLoc dl = N->getDebugLoc(); + // Zero extend things like i1, sign extend everything else. It shouldn't + // matter in theory which one we pick, but this tends to give better code? + unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(VT), + SDValue(N, 0)); + assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?"); + return Result; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || + CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || + CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) && + "can only promote integers"); + MVT OutVT = TLI.getTypeToTransformTo(N->getValueType(0)); + return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0), + N->getOperand(1), N->getOperand(2), + N->getOperand(3), N->getOperand(4), CvtCode); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + DebugLoc dl = N->getDebugLoc(); + MVT OVT = N->getValueType(0); + MVT NVT = Op.getValueType(); + Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op); + // Subtract off the extra leading bits in the bigger type. + return DAG.getNode(ISD::SUB, dl, NVT, Op, + DAG.getConstant(NVT.getSizeInBits() - + OVT.getSizeInBits(), NVT)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + MVT OVT = N->getValueType(0); + MVT NVT = Op.getValueType(); + DebugLoc dl = N->getDebugLoc(); + // The count is the same in the promoted type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + APInt TopBit(NVT.getSizeInBits(), 0); + TopBit.set(OVT.getSizeInBits()); + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); + return DAG.getNode(ISD::CTTZ, dl, NVT, Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { + MVT OldVT = N->getValueType(0); + SDValue OldVec = N->getOperand(0); + if (getTypeAction(OldVec.getValueType()) == WidenVector) + OldVec = GetWidenedVector(N->getOperand(0)); + unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + DebugLoc dl = N->getDebugLoc(); + + if (OldElts == 1) { + assert(!isTypeLegal(OldVec.getValueType()) && + "Legal one-element vector of a type needing promotion!"); + // It is tempting to follow GetScalarizedVector by a call to + // GetPromotedInteger, but this would be wrong because the + // scalarized value may not yet have been processed. + return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), + GetScalarizedVector(OldVec)); + } + + // Convert to a vector half as long with an element type of twice the width, + // for example <4 x i16> -> <2 x i32>. + assert(!(OldElts & 1) && "Odd length vectors not supported!"); + MVT NewVT = MVT::getIntegerVT(2 * OldVT.getSizeInBits()); + assert(OldVT.isSimple() && NewVT.isSimple()); + + SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::getVectorVT(NewVT, OldElts / 2), + OldVec); + + // Extract the element at OldIdx / 2 from the new vector. + SDValue OldIdx = N->getOperand(1); + SDValue NewIdx = DAG.getNode(ISD::SRL, dl, OldIdx.getValueType(), OldIdx, + DAG.getConstant(1, TLI.getPointerTy())); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, NewIdx); + + // Select the appropriate half of the element: Lo if OldIdx was even, + // Hi if it was odd. + SDValue Lo = Elt; + SDValue Hi = DAG.getNode(ISD::SRL, dl, NewVT, Elt, + DAG.getConstant(OldVT.getSizeInBits(), + TLI.getPointerTy())); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + // Extend to the promoted type. + SDValue Odd = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, OldIdx); + SDValue Res = DAG.getNode(ISD::SELECT, dl, NewVT, Odd, Hi, Lo); + return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned NewOpc = N->getOpcode(); + DebugLoc dl = N->getDebugLoc(); + + // If we're promoting a UINT to a larger size, check to see if the new node + // will be legal. If it isn't, check to see if FP_TO_SINT is legal, since + // we can use that instead. This allows us to generate better code for + // FP_TO_UINT for small destination sizes on targets where FP_TO_UINT is not + // legal, such as PowerPC. + if (N->getOpcode() == ISD::FP_TO_UINT && + !TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) + NewOpc = ISD::FP_TO_SINT; + + SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + + // Assert that the converted value fits in the original type. If it doesn't + // (eg: because the value being converted is too big), then the result of the + // original operation was undefined anyway, so the assert is still correct. + return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? + ISD::AssertZext : ISD::AssertSext, dl, + NVT, Res, DAG.getValueType(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + + if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) { + SDValue Res = GetPromotedInteger(N->getOperand(0)); + assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!"); + + // If the result and operand types are the same after promotion, simplify + // to an in-register extension. + if (NVT == Res.getValueType()) { + // The high bits are not guaranteed to be anything. Insert an extend. + if (N->getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, + DAG.getValueType(N->getOperand(0).getValueType())); + if (N->getOpcode() == ISD::ZERO_EXTEND) + return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType()); + assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!"); + return Res; + } + } + + // Otherwise, just extend the original operand all the way to the larger type. + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + ISD::LoadExtType ExtType = + ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); + DebugLoc dl = N->getDebugLoc(); + SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), + N->getSrcValue(), N->getSrcValueOffset(), + N->getMemoryVT(), N->isVolatile(), + N->getAlignment()); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +/// Promote the overflow flag of an overflowing arithmetic node. +SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { + // Simply change the return type of the boolean result. + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(1)); + MVT ValueVTs[] = { N->getValueType(0), NVT }; + SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; + SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + DAG.getVTList(ValueVTs, 2), Ops, 2); + + // Modified the sum result - switch anything that used the old sum to use + // the new one. + ReplaceValueWith(SDValue(N, 0), Res); + + return SDValue(Res.getNode(), 1); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // The operation overflowed iff the result in the larger type is not the + // sign extension of its truncation to the original type. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + MVT OVT = N->getOperand(0).getValueType(); + MVT NVT = LHS.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Do the arithmetic in the larger type. + unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB; + SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS); + + // Calculate the overflow flag: sign extend the arithmetic result from + // the original type. + SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, + DAG.getValueType(OVT)); + // Overflowed if and only if this is not equal to Res. + Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) { + // Sign extend the input. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(1)); + SDValue RHS = GetPromotedInteger(N->getOperand(2)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0),LHS,RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(2)); + SDValue RHS = GetPromotedInteger(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { + MVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType()); + assert(isTypeLegal(SVT) && "Illegal SetCC type!"); + DebugLoc dl = N->getDebugLoc(); + + // Get the SETCC result using the canonical SETCC type. + SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); + + // Convert to the expected type. + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); + return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { + return DAG.getNode(ISD::SHL, N->getDebugLoc(), + TLI.getTypeToTransformTo(N->getValueType(0)), + GetPromotedInteger(N->getOperand(0)), N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { + // The input may have strange things in the top bits of the registers, but + // these operations don't care. They may have weird bits going out, but + // that too is okay if they are integer operations. + SDValue LHS = GetPromotedInteger(N->getOperand(0)); + SDValue RHS = GetPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { + // The input value must be properly sign extended. + SDValue Res = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SRA, N->getDebugLoc(), + Res.getValueType(), Res, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { + // The input value must be properly zero extended. + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + SDValue Res = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Res; + + switch (getTypeAction(N->getOperand(0).getValueType())) { + default: assert(0 && "Unknown type action!"); + case Legal: + case ExpandInteger: + Res = N->getOperand(0); + break; + case PromoteInteger: + Res = GetPromotedInteger(N->getOperand(0)); + break; + } + + // Truncate to NVT instead of VT + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // The operation overflowed iff the result in the larger type is not the + // zero extension of its truncation to the original type. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + MVT OVT = N->getOperand(0).getValueType(); + MVT NVT = LHS.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Do the arithmetic in the larger type. + unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; + SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS); + + // Calculate the overflow flag: zero extend the arithmetic result from + // the original type. + SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT); + // Overflowed if and only if this is not equal to Res. + Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { + // Zero extend the input. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { + SDValue Chain = N->getOperand(0); // Get the chain. + SDValue Ptr = N->getOperand(1); // Get the pointer. + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + MVT RegVT = TLI.getRegisterType(VT); + unsigned NumRegs = TLI.getNumRegisters(VT); + // The argument is passed as NumRegs registers of type RegVT. + + SmallVector<SDValue, 8> Parts(NumRegs); + for (unsigned i = 0; i < NumRegs; ++i) { + Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2)); + Chain = Parts[i].getValue(1); + } + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::reverse(Parts.begin(), Parts.end()); + + // Assemble the parts in the promoted type. + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]); + for (unsigned i = 1; i < NumRegs; ++i) { + SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]); + // Shift it to the right position and "or" it in. + Part = DAG.getNode(ISD::SHL, dl, NVT, Part, + DAG.getConstant(i * RegVT.getSizeInBits(), + TLI.getPointerTy())); + Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part); + } + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Chain); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { + assert(ResNo == 1 && "Only boolean result promotion currently supported!"); + return PromoteIntRes_Overflow(N); +} + +//===----------------------------------------------------------------------===// +// Integer Operand Promotion +//===----------------------------------------------------------------------===// + +/// PromoteIntegerOperand - This method is called when the specified operand of +/// the specified node is found to need promotion. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Promote integer operand: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + cerr << "PromoteIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; + #endif + assert(0 && "Do not know how to promote this operator's operand!"); + abort(); + + case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break; + case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break; + case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break; + case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break; + case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break; + case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break; + case ISD::CONVERT_RNDSAT: + Res = PromoteIntOp_CONVERT_RNDSAT(N); break; + case ISD::INSERT_VECTOR_ELT: + Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; + case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; + case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; + case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; + case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break; + case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; + case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), + OpNo); break; + case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; + case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; + case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// PromoteSetCCOperands - Promote the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, + ISD::CondCode CCCode) { + // We have to insert explicit sign or zero extends. Note that we could + // insert sign extends for ALL conditions, but zero extend is cheaper on + // many machines (an AND instead of two shifts), so prefer it. + switch (CCCode) { + default: assert(0 && "Unknown integer comparison!"); + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGE: + case ISD::SETUGT: + case ISD::SETULE: + case ISD::SETULT: + // ALL of these operations will work if we either sign or zero extend + // the operands (including the unsigned comparisons!). Zero extend is + // usually a simpler/cheaper operation, so prefer it. + NewLHS = ZExtPromotedInteger(NewLHS); + NewRHS = ZExtPromotedInteger(NewRHS); + break; + case ISD::SETGE: + case ISD::SETGT: + case ISD::SETLT: + case ISD::SETLE: + NewLHS = SExtPromotedInteger(NewLHS); + NewRHS = SExtPromotedInteger(NewRHS); + break; + } +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) { + // This should only occur in unusual situations like bitcasting to an + // x86_fp80, so just turn it into a store+load + return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) { + assert(OpNo == 2 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(2); + SDValue RHS = N->getOperand(3); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get()); + + // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always + // legal types. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "only know how to promote condition"); + + // Promote all the way up to the canonical SetCC type. + MVT SVT = TLI.getSetCCResultType(MVT::Other); + SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); + + // The chain (Op#0) and basic block destination (Op#2) are always legal types. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Cond, + N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { + // Since the result type is legal, the operands must promote to it. + MVT OVT = N->getOperand(0).getValueType(); + SDValue Lo = ZExtPromotedInteger(N->getOperand(0)); + SDValue Hi = GetPromotedInteger(N->getOperand(1)); + assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?"); + DebugLoc dl = N->getDebugLoc(); + + Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi, + DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy())); + return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { + // The vector type is legal but the element type is not. This implies + // that the vector is a power-of-two in length and that the element + // type does not have a strange size (eg: it is not i1). + MVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + assert(!(NumElts & 1) && "Legal vector of one illegal element?"); + + // Promote the inserted value. The type does not need to match the + // vector element type. Check that any extra bits introduced will be + // truncated away. + assert(N->getOperand(0).getValueType().getSizeInBits() >= + N->getValueType(0).getVectorElementType().getSizeInBits() && + "Type of inserted value narrower than vector element type!"); + + SmallVector<SDValue, 16> NewOps; + for (unsigned i = 0; i < NumElts; ++i) + NewOps.push_back(GetPromotedInteger(N->getOperand(i))); + + return DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], NumElts); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || + CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || + CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) && + "can only promote integer arguments"); + SDValue InOp = GetPromotedInteger(N->getOperand(0)); + return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp, + N->getOperand(1), N->getOperand(2), + N->getOperand(3), N->getOperand(4), CvtCode); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, + unsigned OpNo) { + if (OpNo == 1) { + // Promote the inserted value. This is valid because the type does not + // have to match the vector element type. + + // Check that any extra bits introduced will be truncated away. + assert(N->getOperand(1).getValueType().getSizeInBits() >= + N->getValueType(0).getVectorElementType().getSizeInBits() && + "Type of inserted value narrower than vector element type!"); + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + GetPromotedInteger(N->getOperand(1)), + N->getOperand(2)); + } + + assert(OpNo == 2 && "Different operand and result vector types?"); + + // Promote the index. + SDValue Idx = ZExtPromotedInteger(N->getOperand(2)); + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + N->getOperand(1), Idx); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { + SDValue NewOps[6]; + DebugLoc dl = N->getDebugLoc(); + NewOps[0] = N->getOperand(0); + for (unsigned i = 1; i < array_lengthof(NewOps); ++i) { + SDValue Flag = GetPromotedInteger(N->getOperand(i)); + NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1); + } + return DAG.UpdateNodeOperands(SDValue (N, 0), NewOps, + array_lengthof(NewOps)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { + // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote + // the operand in place. + return DAG.UpdateNodeOperands(SDValue(N, 0), + GetPromotedInteger(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Only know how to promote condition"); + + // Promote all the way up to the canonical SetCC type. + MVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType()); + SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT); + + return DAG.UpdateNodeOperands(SDValue(N, 0), Cond, + N->getOperand(1), N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get()); + + // The CC (#4) and the possible return values (#2 and #3) have legal types. + return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2), + N->getOperand(3), N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get()); + + // The CC (#2) is always legal. + return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + ZExtPromotedInteger(N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + DebugLoc dl = N->getDebugLoc(); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), + Op, DAG.getValueType(N->getOperand(0).getValueType())); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { + return DAG.UpdateNodeOperands(SDValue(N, 0), + SExtPromotedInteger(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); + int SVOffset = N->getSrcValueOffset(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + DebugLoc dl = N->getDebugLoc(); + + SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. + + // Truncate the value and store the result. + return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(), + SVOffset, N->getMemoryVT(), + isVolatile, Alignment); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { + return DAG.UpdateNodeOperands(SDValue(N, 0), + ZExtPromotedInteger(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue Op = GetPromotedInteger(N->getOperand(0)); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); + return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType()); +} + + +//===----------------------------------------------------------------------===// +// Integer Result Expansion +//===----------------------------------------------------------------------===// + +/// ExpandIntegerResult - This method is called when the specified result of the +/// specified node is found to need expansion. At this point, the node may also +/// have invalid operands or may have other results that need promotion, we just +/// know that (at least) one result needs expansion. +void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Expand integer result: "; N->dump(&DAG); cerr << "\n"); + SDValue Lo, Hi; + Lo = Hi = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "ExpandIntegerResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to expand the result of this operator!"); + abort(); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + + case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; + case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; + case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + + case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break; + case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break; + case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; + case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; + case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; + case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break; + case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; + case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; + case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; + case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; + case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; + case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; + case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break; + case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; + case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break; + case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break; + case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break; + case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break; + case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break; + + case ISD::AND: + case ISD::OR: + case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break; + + case ISD::ADD: + case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break; + + case ISD::ADDC: + case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break; + + case ISD::ADDE: + case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetExpandedInteger(SDValue(N, ResNo), Lo, Hi); +} + +/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded, +/// and the shift amount is a constant 'Amt'. Expand the operation. +void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // Expand the incoming operand to be shifted, so that we have its parts + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + MVT NVT = InL.getValueType(); + unsigned VTBits = N->getValueType(0).getSizeInBits(); + unsigned NVTBits = NVT.getSizeInBits(); + MVT ShTy = N->getOperand(1).getValueType(); + + if (N->getOpcode() == ISD::SHL) { + if (Amt > VTBits) { + Lo = Hi = DAG.getConstant(0, NVT); + } else if (Amt > NVTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = DAG.getNode(ISD::SHL, dl, + NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy)); + } else if (Amt == NVTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = InL; + } else if (Amt == 1 && + TLI.isOperationLegalOrCustom(ISD::ADDC, + TLI.getTypeToExpandTo(NVT))) { + // Emit this X << 1 as X+X. + SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); + SDValue LoOps[2] = { InL, InL }; + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy)); + Hi = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SRL, dl, NVT, InL, + DAG.getConstant(NVTBits-Amt, ShTy))); + } + return; + } + + if (N->getOpcode() == ISD::SRL) { + if (Amt > VTBits) { + Lo = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, NVT); + } else if (Amt > NVTBits) { + Lo = DAG.getNode(ISD::SRL, dl, + NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy)); + Hi = DAG.getConstant(0, NVT); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = DAG.getConstant(0, NVT); + } else { + Lo = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getConstant(NVTBits-Amt, ShTy))); + Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy)); + } + return; + } + + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + if (Amt > VTBits) { + Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else if (Amt > NVTBits) { + Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(Amt-NVTBits, ShTy)); + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, + DAG.getConstant(NVTBits-1, ShTy)); + } else { + Lo = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, + DAG.getConstant(Amt, ShTy)), + DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getConstant(NVTBits-Amt, ShTy))); + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy)); + } +} + +/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify +/// this shift based on knowledge of the high bit of the shift amount. If we +/// can tell this, we know that it is >= 32 or < 32, without knowing the actual +/// shift amount. +bool DAGTypeLegalizer:: +ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Amt = N->getOperand(1); + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + MVT ShTy = Amt.getValueType(); + unsigned ShBits = ShTy.getSizeInBits(); + unsigned NVTBits = NVT.getSizeInBits(); + assert(isPowerOf2_32(NVTBits) && + "Expanded integer type size not a power of two!"); + DebugLoc dl = N->getDebugLoc(); + + APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); + APInt KnownZero, KnownOne; + DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne); + + // If we don't know anything about the high bits, exit. + if (((KnownZero|KnownOne) & HighBitMask) == 0) + return false; + + // Get the incoming operand to be shifted. + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + // If we know that any of the high bits of the shift amount are one, then we + // can do this as a couple of simple shifts. + if (KnownOne.intersects(HighBitMask)) { + // Mask out the high bit, which we know is set. + Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt, + DAG.getConstant(~HighBitMask, ShTy)); + + switch (N->getOpcode()) { + default: assert(0 && "Unknown shift"); + case ISD::SHL: + Lo = DAG.getConstant(0, NVT); // Low part is zero. + Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. + return true; + case ISD::SRL: + Hi = DAG.getConstant(0, NVT); // Hi part is zero. + Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part. + return true; + case ISD::SRA: + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part. + DAG.getConstant(NVTBits-1, ShTy)); + Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part. + return true; + } + } + +#if 0 + // FIXME: This code is broken for shifts with a zero amount! + // If we know that all of the high bits of the shift amount are zero, then we + // can do this as a couple of simple shifts. + if ((KnownZero & HighBitMask) == HighBitMask) { + // Compute 32-amt. + SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy, + DAG.getConstant(NVTBits, ShTy), + Amt); + unsigned Op1, Op2; + switch (N->getOpcode()) { + default: assert(0 && "Unknown shift"); + case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break; + case ISD::SRL: + case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break; + } + + Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt); + Hi = DAG.getNode(ISD::OR, NVT, + DAG.getNode(Op1, NVT, InH, Amt), + DAG.getNode(Op2, NVT, InL, Amt2)); + return true; + } +#endif + + return false; +} + +/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift +/// of any size. +bool DAGTypeLegalizer:: +ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Amt = N->getOperand(1); + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + MVT ShTy = Amt.getValueType(); + unsigned NVTBits = NVT.getSizeInBits(); + assert(isPowerOf2_32(NVTBits) && + "Expanded integer type size not a power of two!"); + DebugLoc dl = N->getDebugLoc(); + + // Get the incoming operand to be shifted. + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy); + SDValue Amt2 = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); + SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy), + Amt, NVBitsNode, ISD::SETULT); + + SDValue Lo1, Hi1, Lo2, Hi2; + switch (N->getOpcode()) { + default: assert(0 && "Unknown shift"); + case ISD::SHL: + // ShAmt < NVTBits + Lo1 = DAG.getConstant(0, NVT); // Low part is zero. + Hi1 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. + + // ShAmt >= NVTBits + Lo2 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); + Hi2 = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SHL, dl, NVT, InH, Amt), + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt2)); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + return true; + case ISD::SRL: + // ShAmt < NVTBits + Hi1 = DAG.getConstant(0, NVT); // Hi part is zero. + Lo1 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part. + + // ShAmt >= NVTBits + Hi2 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); + Lo2 = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2)); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + return true; + case ISD::SRA: + // ShAmt < NVTBits + Hi1 = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part. + DAG.getConstant(NVTBits-1, ShTy)); + Lo1 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part. + + // ShAmt >= NVTBits + Hi2 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); + Lo2 = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2)); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + return true; + } + + return false; +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + + MVT NVT = LHSL.getValueType(); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support + // them. TODO: Teach operation legalization how to expand unsupported + // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate + // a carry of type MVT::Flag, but there doesn't seem to be any way to + // generate a value of this type in the expanded code sequence. + bool hasCarry = + TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? + ISD::ADDC : ISD::SUBC, + TLI.getTypeToExpandTo(NVT)); + + if (hasCarry) { + SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + } + } else { + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); + Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); + SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0], + ISD::SETULT); + SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1], + ISD::SETULT); + SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, + DAG.getConstant(1, NVT), Carry1); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); + } else { + Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); + Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); + SDValue Cmp = + DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()), + LoOps[0], LoOps[1], ISD::SETULT); + SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); + } + } +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + if (N->getOpcode() == ISD::ADDC) { + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + } else { + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + } + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; + SDValue HiOps[3] = { LHSH, RHSH }; + + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3); + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is any extension of the input (which degenerates to a copy). + Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op); + Hi = DAG.getUNDEF(NVT); // The high part is undefined. + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == PromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + } +} + +void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + unsigned NVTBits = NVT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + if (NVTBits < EVTBits) { + Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi, + DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits))); + } else { + Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); + // The high part replicates the sign bit of Lo, make it explicit. + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(NVTBits-1, TLI.getPointerTy())); + } +} + +void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + unsigned NVTBits = NVT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + if (NVTBits < EVTBits) { + Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi, + DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits))); + } else { + Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT)); + // The high part must be zero, make it explicit. + Hi = DAG.getConstant(0, NVT); + } +} + +void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. + Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned NBitWidth = NVT.getSizeInBits(); + const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue(); + Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT); + Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + + SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi, + DAG.getConstant(0, NVT), ISD::SETNE); + + SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ, + DAG.getNode(ISD::ADD, dl, NVT, LoLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo), + DAG.getNode(ISD::CTPOP, dl, NVT, Hi)); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT NVT = Lo.getValueType(); + + SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, + DAG.getConstant(0, NVT), ISD::SETNE); + + SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi); + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ, + DAG.getNode(ISD::ADD, dl, NVT, HiLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); + Hi = DAG.getConstant(0, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + MVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); + SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + MVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); + SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, + SDValue &Lo, SDValue &Hi) { + if (ISD::isNormalLoad(N)) { + ExpandRes_NormalLoad(N, Lo, Hi); + return; + } + + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + ISD::LoadExtType ExtType = N->getExtensionType(); + int SVOffset = N->getSrcValueOffset(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + DebugLoc dl = N->getDebugLoc(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + if (N->getMemoryVT().bitsLE(NVT)) { + MVT EVT = N->getMemoryVT(); + + Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, + EVT, isVolatile, Alignment); + + // Remember the chain. + Ch = Lo.getValue(1); + + if (ExtType == ISD::SEXTLOAD) { + // The high part is obtained by SRA'ing all but one of the bits of the + // lo part. + unsigned LoSize = Lo.getValueType().getSizeInBits(); + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + } else if (ExtType == ISD::ZEXTLOAD) { + // The high part is just a zero. + Hi = DAG.getConstant(0, NVT); + } else { + assert(ExtType == ISD::EXTLOAD && "Unknown extload!"); + // The high part is undefined. + Hi = DAG.getUNDEF(NVT); + } + } else if (TLI.isLittleEndian()) { + // Little-endian - low bits are at low addresses. + Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, + isVolatile, Alignment); + + unsigned ExcessBits = + N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); + MVT NEVT = MVT::getIntegerVT(ExcessBits); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, NEVT, + isVolatile, MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + } else { + // Big-endian - high bits are at low addresses. Favor aligned loads at + // the cost of some bit-fiddling. + MVT EVT = N->getMemoryVT(); + unsigned EBytes = EVT.getStoreSizeInBits()/8; + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + + // Load both the high bits and maybe some of the low bits. + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, + MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits), + isVolatile, Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + // Load the rest of the low bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, + MVT::getIntegerVT(ExcessBits), + isVolatile, MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer low bits from the bottom of Hi to the top of Lo. + Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, + DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(ExcessBits, + TLI.getPointerTy()))); + // Move high bits to the right position in Hi. + Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, + NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + TLI.getPointerTy())); + } + } + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); +} + +void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL); + Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH); +} + +void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + MVT NVT = TLI.getTypeToTransformTo(VT); + DebugLoc dl = N->getDebugLoc(); + + bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT); + bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT); + bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT); + bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT); + if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + unsigned OuterBitSize = VT.getSizeInBits(); + unsigned InnerBitSize = NVT.getSizeInBits(); + unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); + unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); + + APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); + if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && + DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { + // The inputs are both zero-extended. + if (HasUMUL_LOHI) { + // We can emit a umul_lohi. + Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return; + } + if (HasMULHU) { + // We can emit a mulhu+mul. + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); + return; + } + } + if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { + // The input values are both sign-extended. + if (HasSMUL_LOHI) { + // We can emit a smul_lohi. + Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return; + } + if (HasMULHS) { + // We can emit a mulhs+mul. + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL); + return; + } + } + if (HasUMUL_LOHI) { + // Lo,Hi = umul LHS, RHS. + SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(NVT, NVT), LL, RL); + Lo = UMulLOHI; + Hi = UMulLOHI.getValue(1); + RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); + return; + } + if (HasMULHU) { + Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); + RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); + return; + } + } + + // If nothing else, we can make a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::MUL_I16; + else if (VT == MVT::i32) + LC = RTLIB::MUL_I32; + else if (VT == MVT::i64) + LC = RTLIB::MUL_I64; + else if (VT == MVT::i128) + LC = RTLIB::MUL_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::SDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::SDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::SDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // If we can emit an efficient shift operation, do so now. Check to see if + // the RHS is a constant. + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) + return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi); + + // If we can determine that the high bit of the shift is zero or one, even if + // the low bits are variable, emit this shift in an optimized form. + if (ExpandShiftWithKnownAmountBit(N, Lo, Hi)) + return; + + // If this target supports shift_PARTS, use it. First, map to the _PARTS opc. + unsigned PartsOpc; + if (N->getOpcode() == ISD::SHL) { + PartsOpc = ISD::SHL_PARTS; + } else if (N->getOpcode() == ISD::SRL) { + PartsOpc = ISD::SRL_PARTS; + } else { + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + PartsOpc = ISD::SRA_PARTS; + } + + // Next check to see if the target supports this SHL_PARTS operation or if it + // will custom expand it. + MVT NVT = TLI.getTypeToTransformTo(VT); + TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT); + if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || + Action == TargetLowering::Custom) { + // Expand the subcomponents. + SDValue LHSL, LHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + + SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) }; + MVT VT = LHSL.getValueType(); + Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3); + Hi = Lo.getValue(1); + return; + } + + // Otherwise, emit a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + bool isSigned; + if (N->getOpcode() == ISD::SHL) { + isSigned = false; /*sign irrelevant*/ + if (VT == MVT::i16) + LC = RTLIB::SHL_I16; + else if (VT == MVT::i32) + LC = RTLIB::SHL_I32; + else if (VT == MVT::i64) + LC = RTLIB::SHL_I64; + else if (VT == MVT::i128) + LC = RTLIB::SHL_I128; + } else if (N->getOpcode() == ISD::SRL) { + isSigned = false; + if (VT == MVT::i16) + LC = RTLIB::SRL_I16; + else if (VT == MVT::i32) + LC = RTLIB::SRL_I32; + else if (VT == MVT::i64) + LC = RTLIB::SRL_I64; + else if (VT == MVT::i128) + LC = RTLIB::SRL_I128; + } else { + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + isSigned = true; + if (VT == MVT::i16) + LC = RTLIB::SRA_I16; + else if (VT == MVT::i32) + LC = RTLIB::SRA_I32; + else if (VT == MVT::i64) + LC = RTLIB::SRA_I64; + else if (VT == MVT::i128) + LC = RTLIB::SRA_I128; + } + + if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi); + return; + } + + if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi)) + assert(0 && "Unsupported shift!"); +} + +void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is sign extension of the input (degenerates to a copy). + Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0)); + // The high part is obtained by SRA'ing all but one of the bits of low part. + unsigned LoSize = NVT.getSizeInBits(); + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == PromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + unsigned ExcessBits = + Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); + Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, + DAG.getValueType(MVT::getIntegerVT(ExcessBits))); + } +} + +void DAGTypeLegalizer:: +ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + + if (EVT.bitsLE(Lo.getValueType())) { + // sext_inreg the low part if needed. + Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo, + N->getOperand(1)); + + // The high part gets the sign extension from the lo-part. This handles + // things like sextinreg V:i64 from i8. + Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo, + DAG.getConstant(Hi.getValueType().getSizeInBits()-1, + TLI.getPointerTy())); + } else { + // For example, extension of an i48 to an i64. Leave the low part alone, + // sext_inreg the high part. + unsigned ExcessBits = + EVT.getSizeInBits() - Lo.getValueType().getSizeInBits(); + Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, + DAG.getValueType(MVT::getIntegerVT(ExcessBits))); + } +} + +void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::SREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::SREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::SREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); + Hi = DAG.getNode(ISD::SRL, dl, + N->getOperand(0).getValueType(), N->getOperand(0), + DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy())); + Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::UDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::UDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::UDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::UREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::UREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::UREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is zero extension of the input (degenerates to a copy). + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); + Hi = DAG.getConstant(0, NVT); // The high part is just a zero. + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == PromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + unsigned ExcessBits = + Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); + Hi = DAG.getZeroExtendInReg(Hi, dl, MVT::getIntegerVT(ExcessBits)); + } +} + + +//===----------------------------------------------------------------------===// +// Integer Operand Expansion +//===----------------------------------------------------------------------===// + +/// ExpandIntegerOperand - This method is called when the specified operand of +/// the specified node is found to need expansion. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Expand integer operand: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + cerr << "ExpandIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; + #endif + assert(0 && "Do not know how to expand this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; + case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break; + case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; + case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; + case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; + case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: Res = ExpandIntOp_Shift(N); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code +/// is shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, + SDValue &NewRHS, + ISD::CondCode &CCCode, + DebugLoc dl) { + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(NewLHS, LHSLo, LHSHi); + GetExpandedInteger(NewRHS, RHSLo, RHSHi); + + MVT VT = NewLHS.getValueType(); + + if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) { + if (RHSLo == RHSHi) { + if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) { + if (RHSCST->isAllOnesValue()) { + // Equality comparison to -1. + NewLHS = DAG.getNode(ISD::AND, dl, + LHSLo.getValueType(), LHSLo, LHSHi); + NewRHS = RHSLo; + return; + } + } + } + + NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo); + NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi); + NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS); + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + return; + } + + // If this is a comparison of the sign bit, just look at the top part. + // X > -1, x < 0 + if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS)) + if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0 + (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1 + NewLHS = LHSHi; + NewRHS = RHSHi; + return; + } + + // FIXME: This generated code sucks. + ISD::CondCode LowCC; + switch (CCCode) { + default: assert(0 && "Unknown integer setcc!"); + case ISD::SETLT: + case ISD::SETULT: LowCC = ISD::SETULT; break; + case ISD::SETGT: + case ISD::SETUGT: LowCC = ISD::SETUGT; break; + case ISD::SETLE: + case ISD::SETULE: LowCC = ISD::SETULE; break; + case ISD::SETGE: + case ISD::SETUGE: LowCC = ISD::SETUGE; break; + } + + // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison + // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands + // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2; + + // NOTE: on targets without efficient SELECT of bools, we can always use + // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, NULL); + SDValue Tmp1, Tmp2; + Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); + if (!Tmp1.getNode()) + Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC); + Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); + if (!Tmp2.getNode()) + Tmp2 = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, DAG.getCondCode(CCCode)); + + ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode()); + ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode()); + if ((Tmp1C && Tmp1C->isNullValue()) || + (Tmp2C && Tmp2C->isNullValue() && + (CCCode == ISD::SETLE || CCCode == ISD::SETGE || + CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) || + (Tmp2C && Tmp2C->getAPIntValue() == 1 && + (CCCode == ISD::SETLT || CCCode == ISD::SETGT || + CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) { + // low part is known false, returns high part. + // For LE / GE, if high part is known false, ignore the low part. + // For LT / GT, if high part is known true, ignore the low part. + NewLHS = Tmp2; + NewRHS = SDValue(); + return; + } + + NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETEQ, false, + DagCombineInfo, dl); + if (!NewLHS.getNode()) + NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETEQ); + NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(), + NewLHS, Tmp1, Tmp2); + NewRHS = SDValue(); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (NewRHS.getNode() == 0) { + NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If ExpandSetCCOperands returned a scalar, use it. + if (NewRHS.getNode() == 0) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + DAG.getCondCode(CCCode)); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { + // The value being shifted is legal, but the shift amount is too big. + // It follows that either the result of the shift is undefined, or the + // upper half of the shift amount is zero. Just use the lower half. + SDValue Lo, Hi; + GetExpandedInteger(N->getOperand(1), Lo, Hi); + return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { + SDValue Op = N->getOperand(0); + MVT DstVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Don't know how to expand this SINT_TO_FP!"); + return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc()); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { + if (ISD::isNormalStore(N)) + return ExpandOp_NormalStore(N, OpNo); + + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + + MVT VT = N->getOperand(1).getValueType(); + MVT NVT = TLI.getTypeToTransformTo(VT); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + int SVOffset = N->getSrcValueOffset(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + DebugLoc dl = N->getDebugLoc(); + SDValue Lo, Hi; + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + if (N->getMemoryVT().bitsLE(NVT)) { + GetExpandedInteger(N->getValue(), Lo, Hi); + return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + N->getMemoryVT(), isVolatile, Alignment); + } else if (TLI.isLittleEndian()) { + // Little-endian - low bits are at low addresses. + GetExpandedInteger(N->getValue(), Lo, Hi); + + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + isVolatile, Alignment); + + unsigned ExcessBits = + N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); + MVT NEVT = MVT::getIntegerVT(ExcessBits); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, NEVT, + isVolatile, MinAlign(Alignment, IncrementSize)); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } else { + // Big-endian - high bits are at low addresses. Favor aligned stores at + // the cost of some bit-fiddling. + GetExpandedInteger(N->getValue(), Lo, Hi); + + MVT EVT = N->getMemoryVT(); + unsigned EBytes = EVT.getStoreSizeInBits()/8; + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + MVT HiVT = MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer high bits from the top of Lo to the bottom of Hi. + Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + TLI.getPointerTy())); + Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, + DAG.getNode(ISD::SRL, dl, NVT, Lo, + DAG.getConstant(ExcessBits, + TLI.getPointerTy()))); + } + + // Store both the high bits and maybe some of the low bits. + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), + SVOffset, HiVT, isVolatile, Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + // Store the lowest ExcessBits bits in the second half. + Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), + SVOffset+IncrementSize, + MVT::getIntegerVT(ExcessBits), + isVolatile, MinAlign(Alignment, IncrementSize)); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } +} + +SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + // Just truncate the low part of the source. + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { + SDValue Op = N->getOperand(0); + MVT SrcVT = Op.getValueType(); + MVT DstVT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ + // Do a signed conversion then adjust the result. + SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); + SignedConv = TLI.LowerOperation(SignedConv, DAG); + + // The result of the signed conversion needs adjusting if the 'sign bit' of + // the incoming integer was set. To handle this, we dynamically test to see + // if it is set, and, if so, add a fudge factor. + + const uint64_t F32TwoE32 = 0x4F800000ULL; + const uint64_t F32TwoE64 = 0x5F800000ULL; + const uint64_t F32TwoE128 = 0x7F800000ULL; + + APInt FF(32, 0); + if (SrcVT == MVT::i32) + FF = APInt(32, F32TwoE32); + else if (SrcVT == MVT::i64) + FF = APInt(32, F32TwoE64); + else if (SrcVT == MVT::i128) + FF = APInt(32, F32TwoE128); + else + assert(false && "Unsupported UINT_TO_FP!"); + + // Check whether the sign bit is set. + SDValue Lo, Hi; + GetExpandedInteger(Op, Lo, Hi); + SDValue SignSet = DAG.getSetCC(dl, + TLI.getSetCCResultType(Hi.getValueType()), + Hi, DAG.getConstant(0, Hi.getValueType()), + ISD::SETLT); + + // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. + SDValue FudgePtr = DAG.getConstantPool(ConstantInt::get(FF.zext(64)), + TLI.getPointerTy()); + + // Get a pointer to FF if the sign bit was set, or to 0 otherwise. + SDValue Zero = DAG.getIntPtrConstant(0); + SDValue Four = DAG.getIntPtrConstant(4); + if (TLI.isBigEndian()) std::swap(Zero, Four); + SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, + Zero, Four); + unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment(); + FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset); + Alignment = std::min(Alignment, 4u); + + // Load the value out, extending it from f32 to the destination float type. + // FIXME: Avoid the extend by constructing the right constant pool? + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), + FudgePtr, NULL, 0, MVT::f32, + false, Alignment); + return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); + } + + // Otherwise, use a libcall. + RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Don't know how to expand this UINT_TO_FP!"); + return MakeLibCall(LC, DstVT, &Op, 1, true, dl); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp new file mode 100644 index 000000000000..00d71e1a4fe9 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -0,0 +1,1074 @@ +//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::LegalizeTypes method. It transforms +// an arbitrary well-formed SelectionDAG to only consist of legal types. This +// is common code shared among the LegalizeTypes*.cpp files. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/CallingConv.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +static cl::opt<bool> +EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden); + +/// PerformExpensiveChecks - Do extensive, expensive, sanity checking. +void DAGTypeLegalizer::PerformExpensiveChecks() { + // If a node is not processed, then none of its values should be mapped by any + // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. + + // If a node is processed, then each value with an illegal type must be mapped + // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. + // Values with a legal type may be mapped by ReplacedValues, but not by any of + // the other maps. + + // Note that these invariants may not hold momentarily when processing a node: + // the node being processed may be put in a map before being marked Processed. + + // Note that it is possible to have nodes marked NewNode in the DAG. This can + // occur in two ways. Firstly, a node may be created during legalization but + // never passed to the legalization core. This is usually due to the implicit + // folding that occurs when using the DAG.getNode operators. Secondly, a new + // node may be passed to the legalization core, but when analyzed may morph + // into a different node, leaving the original node as a NewNode in the DAG. + // A node may morph if one of its operands changes during analysis. Whether + // it actually morphs or not depends on whether, after updating its operands, + // it is equivalent to an existing node: if so, it morphs into that existing + // node (CSE). An operand can change during analysis if the operand is a new + // node that morphs, or it is a processed value that was mapped to some other + // value (as recorded in ReplacedValues) in which case the operand is turned + // into that other value. If a node morphs then the node it morphed into will + // be used instead of it for legalization, however the original node continues + // to live on in the DAG. + // The conclusion is that though there may be nodes marked NewNode in the DAG, + // all uses of such nodes are also marked NewNode: the result is a fungus of + // NewNodes growing on top of the useful nodes, and perhaps using them, but + // not used by them. + + // If a value is mapped by ReplacedValues, then it must have no uses, except + // by nodes marked NewNode (see above). + + // The final node obtained by mapping by ReplacedValues is not marked NewNode. + // Note that ReplacedValues should be applied iteratively. + + // Note that the ReplacedValues map may also map deleted nodes. By iterating + // over the DAG we only consider non-deleted nodes. + SmallVector<SDNode*, 16> NewNodes; + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + // Remember nodes marked NewNode - they are subject to extra checking below. + if (I->getNodeId() == NewNode) + NewNodes.push_back(I); + + for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) { + SDValue Res(I, i); + bool Failed = false; + + unsigned Mapped = 0; + if (ReplacedValues.find(Res) != ReplacedValues.end()) { + Mapped |= 1; + // Check that remapped values are only used by nodes marked NewNode. + for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + if (UI.getUse().getResNo() == i) + assert(UI->getNodeId() == NewNode && + "Remapped value has non-trivial use!"); + + // Check that the final result of applying ReplacedValues is not + // marked NewNode. + SDValue NewVal = ReplacedValues[Res]; + DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal); + while (I != ReplacedValues.end()) { + NewVal = I->second; + I = ReplacedValues.find(NewVal); + } + assert(NewVal.getNode()->getNodeId() != NewNode && + "ReplacedValues maps to a new node!"); + } + if (PromotedIntegers.find(Res) != PromotedIntegers.end()) + Mapped |= 2; + if (SoftenedFloats.find(Res) != SoftenedFloats.end()) + Mapped |= 4; + if (ScalarizedVectors.find(Res) != ScalarizedVectors.end()) + Mapped |= 8; + if (ExpandedIntegers.find(Res) != ExpandedIntegers.end()) + Mapped |= 16; + if (ExpandedFloats.find(Res) != ExpandedFloats.end()) + Mapped |= 32; + if (SplitVectors.find(Res) != SplitVectors.end()) + Mapped |= 64; + if (WidenedVectors.find(Res) != WidenedVectors.end()) + Mapped |= 128; + + if (I->getNodeId() != Processed) { + if (Mapped != 0) { + cerr << "Unprocessed value in a map!"; + Failed = true; + } + } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) { + if (Mapped > 1) { + cerr << "Value with legal type was transformed!"; + Failed = true; + } + } else { + if (Mapped == 0) { + cerr << "Processed value not in any map!"; + Failed = true; + } else if (Mapped & (Mapped - 1)) { + cerr << "Value in multiple maps!"; + Failed = true; + } + } + + if (Failed) { + if (Mapped & 1) + cerr << " ReplacedValues"; + if (Mapped & 2) + cerr << " PromotedIntegers"; + if (Mapped & 4) + cerr << " SoftenedFloats"; + if (Mapped & 8) + cerr << " ScalarizedVectors"; + if (Mapped & 16) + cerr << " ExpandedIntegers"; + if (Mapped & 32) + cerr << " ExpandedFloats"; + if (Mapped & 64) + cerr << " SplitVectors"; + if (Mapped & 128) + cerr << " WidenedVectors"; + cerr << "\n"; + abort(); + } + } + } + + // Checked that NewNodes are only used by other NewNodes. + for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) { + SDNode *N = NewNodes[i]; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) + assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!"); + } +} + +/// run - This is the main entry point for the type legalizer. This does a +/// top-down traversal of the dag, legalizing types as it goes. Returns "true" +/// if it made any changes. +bool DAGTypeLegalizer::run() { + bool Changed = false; + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + Dummy.setNodeId(Unanalyzed); + + // The root of the dag may dangle to deleted nodes until the type legalizer is + // done. Set it to null to avoid confusion. + DAG.setRoot(SDValue()); + + // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess' + // (and remembering them) if they are leaves and assigning 'Unanalyzed' if + // non-leaves. + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + if (I->getNumOperands() == 0) { + I->setNodeId(ReadyToProcess); + Worklist.push_back(I); + } else { + I->setNodeId(Unanalyzed); + } + } + + // Now that we have a set of nodes to process, handle them all. + while (!Worklist.empty()) { +#ifndef XDEBUG + if (EnableExpensiveChecks) +#endif + PerformExpensiveChecks(); + + SDNode *N = Worklist.back(); + Worklist.pop_back(); + assert(N->getNodeId() == ReadyToProcess && + "Node should be ready if on worklist!"); + + if (IgnoreNodeResults(N)) + goto ScanOperands; + + // Scan the values produced by the node, checking to see if any result + // types are illegal. + for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { + MVT ResultVT = N->getValueType(i); + switch (getTypeAction(ResultVT)) { + default: + assert(false && "Unknown action!"); + case Legal: + break; + // The following calls must take care of *all* of the node's results, + // not just the illegal result they were passed (this includes results + // with a legal type). Results can be remapped using ReplaceValueWith, + // or their promoted/expanded/etc values registered in PromotedIntegers, + // ExpandedIntegers etc. + case PromoteInteger: + PromoteIntegerResult(N, i); + Changed = true; + goto NodeDone; + case ExpandInteger: + ExpandIntegerResult(N, i); + Changed = true; + goto NodeDone; + case SoftenFloat: + SoftenFloatResult(N, i); + Changed = true; + goto NodeDone; + case ExpandFloat: + ExpandFloatResult(N, i); + Changed = true; + goto NodeDone; + case ScalarizeVector: + ScalarizeVectorResult(N, i); + Changed = true; + goto NodeDone; + case SplitVector: + SplitVectorResult(N, i); + Changed = true; + goto NodeDone; + case WidenVector: + WidenVectorResult(N, i); + Changed = true; + goto NodeDone; + } + } + +ScanOperands: + // Scan the operand list for the node, handling any nodes with operands that + // are illegal. + { + unsigned NumOperands = N->getNumOperands(); + bool NeedsReanalyzing = false; + unsigned i; + for (i = 0; i != NumOperands; ++i) { + if (IgnoreNodeResults(N->getOperand(i).getNode())) + continue; + + MVT OpVT = N->getOperand(i).getValueType(); + switch (getTypeAction(OpVT)) { + default: + assert(false && "Unknown action!"); + case Legal: + continue; + // The following calls must either replace all of the node's results + // using ReplaceValueWith, and return "false"; or update the node's + // operands in place, and return "true". + case PromoteInteger: + NeedsReanalyzing = PromoteIntegerOperand(N, i); + Changed = true; + break; + case ExpandInteger: + NeedsReanalyzing = ExpandIntegerOperand(N, i); + Changed = true; + break; + case SoftenFloat: + NeedsReanalyzing = SoftenFloatOperand(N, i); + Changed = true; + break; + case ExpandFloat: + NeedsReanalyzing = ExpandFloatOperand(N, i); + Changed = true; + break; + case ScalarizeVector: + NeedsReanalyzing = ScalarizeVectorOperand(N, i); + Changed = true; + break; + case SplitVector: + NeedsReanalyzing = SplitVectorOperand(N, i); + Changed = true; + break; + case WidenVector: + NeedsReanalyzing = WidenVectorOperand(N, i); + Changed = true; + break; + } + break; + } + + // The sub-method updated N in place. Check to see if any operands are new, + // and if so, mark them. If the node needs revisiting, don't add all users + // to the worklist etc. + if (NeedsReanalyzing) { + assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + N->setNodeId(NewNode); + // Recompute the NodeId and correct processed operands, adding the node to + // the worklist if ready. + SDNode *M = AnalyzeNewNode(N); + if (M == N) + // The node didn't morph - nothing special to do, it will be revisited. + continue; + + // The node morphed - this is equivalent to legalizing by replacing every + // value of N with the corresponding value of M. So do that now. However + // there is no need to remember the replacement - morphing will make sure + // it is never used non-trivially. + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + // Replacing the value takes care of remapping the new value. Do the + // replacement without recording it in ReplacedValues. This does not + // expunge From but that is fine - it is not really a new node. + ReplaceValueWithHelper(SDValue(N, i), SDValue(M, i)); + assert(N->getNodeId() == NewNode && "Unexpected node state!"); + // The node continues to live on as part of the NewNode fungus that + // grows on top of the useful nodes. Nothing more needs to be done + // with it - move on to the next node. + continue; + } + + if (i == NumOperands) { + DEBUG(cerr << "Legally typed node: "; N->dump(&DAG); cerr << "\n"); + } + } +NodeDone: + + // If we reach here, the node was processed, potentially creating new nodes. + // Mark it as processed and add its users to the worklist as appropriate. + assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + N->setNodeId(Processed); + + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + int NodeId = User->getNodeId(); + + // This node has two options: it can either be a new node or its Node ID + // may be a count of the number of operands it has that are not ready. + if (NodeId > 0) { + User->setNodeId(NodeId-1); + + // If this was the last use it was waiting on, add it to the ready list. + if (NodeId-1 == ReadyToProcess) + Worklist.push_back(User); + continue; + } + + // If this is an unreachable new node, then ignore it. If it ever becomes + // reachable by being used by a newly created node then it will be handled + // by AnalyzeNewNode. + if (NodeId == NewNode) + continue; + + // Otherwise, this node is new: this is the first operand of it that + // became ready. Its new NodeId is the number of operands it has minus 1 + // (as this node is now processed). + assert(NodeId == Unanalyzed && "Unknown node ID!"); + User->setNodeId(User->getNumOperands() - 1); + + // If the node only has a single operand, it is now ready. + if (User->getNumOperands() == 1) + Worklist.push_back(User); + } + } + +#ifndef XDEBUG + if (EnableExpensiveChecks) +#endif + PerformExpensiveChecks(); + + // If the root changed (e.g. it was a dead load) update the root. + DAG.setRoot(Dummy.getValue()); + + // Remove dead nodes. This is important to do for cleanliness but also before + // the checking loop below. Implicit folding by the DAG.getNode operators and + // node morphing can cause unreachable nodes to be around with their flags set + // to new. + DAG.RemoveDeadNodes(); + + // In a debug build, scan all the nodes to make sure we found them all. This + // ensures that there are no cycles and that everything got processed. +#ifndef NDEBUG + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + bool Failed = false; + + // Check that all result types are legal. + if (!IgnoreNodeResults(I)) + for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i) + if (!isTypeLegal(I->getValueType(i))) { + cerr << "Result type " << i << " illegal!\n"; + Failed = true; + } + + // Check that all operand types are legal. + for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i) + if (!IgnoreNodeResults(I->getOperand(i).getNode()) && + !isTypeLegal(I->getOperand(i).getValueType())) { + cerr << "Operand type " << i << " illegal!\n"; + Failed = true; + } + + if (I->getNodeId() != Processed) { + if (I->getNodeId() == NewNode) + cerr << "New node not analyzed?\n"; + else if (I->getNodeId() == Unanalyzed) + cerr << "Unanalyzed node not noticed?\n"; + else if (I->getNodeId() > 0) + cerr << "Operand not processed?\n"; + else if (I->getNodeId() == ReadyToProcess) + cerr << "Not added to worklist?\n"; + Failed = true; + } + + if (Failed) { + I->dump(&DAG); cerr << "\n"; + abort(); + } + } +#endif + + return Changed; +} + +/// AnalyzeNewNode - The specified node is the root of a subtree of potentially +/// new nodes. Correct any processed operands (this may change the node) and +/// calculate the NodeId. If the node itself changes to a processed node, it +/// is not remapped - the caller needs to take care of this. +/// Returns the potentially changed node. +SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { + // If this was an existing node that is already done, we're done. + if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed) + return N; + + // Remove any stale map entries. + ExpungeNode(N); + + // Okay, we know that this node is new. Recursively walk all of its operands + // to see if they are new also. The depth of this walk is bounded by the size + // of the new tree that was constructed (usually 2-3 nodes), so we don't worry + // about revisiting of nodes. + // + // As we walk the operands, keep track of the number of nodes that are + // processed. If non-zero, this will become the new nodeid of this node. + // Operands may morph when they are analyzed. If so, the node will be + // updated after all operands have been analyzed. Since this is rare, + // the code tries to minimize overhead in the non-morphing case. + + SmallVector<SDValue, 8> NewOps; + unsigned NumProcessed = 0; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue OrigOp = N->getOperand(i); + SDValue Op = OrigOp; + + AnalyzeNewValue(Op); // Op may morph. + + if (Op.getNode()->getNodeId() == Processed) + ++NumProcessed; + + if (!NewOps.empty()) { + // Some previous operand changed. Add this one to the list. + NewOps.push_back(Op); + } else if (Op != OrigOp) { + // This is the first operand to change - add all operands so far. + for (unsigned j = 0; j < i; ++j) + NewOps.push_back(N->getOperand(j)); + NewOps.push_back(Op); + } + } + + // Some operands changed - update the node. + if (!NewOps.empty()) { + SDNode *M = DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], + NewOps.size()).getNode(); + if (M != N) { + // The node morphed into a different node. Normally for this to happen + // the original node would have to be marked NewNode. However this can + // in theory momentarily not be the case while ReplaceValueWith is doing + // its stuff. Mark the original node NewNode to help sanity checking. + N->setNodeId(NewNode); + if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed) + // It morphed into a previously analyzed node - nothing more to do. + return M; + + // It morphed into a different new node. Do the equivalent of passing + // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need + // to remap the operands, since they are the same as the operands we + // remapped above. + N = M; + ExpungeNode(N); + } + } + + // Calculate the NodeId. + N->setNodeId(N->getNumOperands() - NumProcessed); + if (N->getNodeId() == ReadyToProcess) + Worklist.push_back(N); + + return N; +} + +/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed. +/// If the node changes to a processed node, then remap it. +void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) { + Val.setNode(AnalyzeNewNode(Val.getNode())); + if (Val.getNode()->getNodeId() == Processed) + // We were passed a processed node, or it morphed into one - remap it. + RemapValue(Val); +} + +/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it. +/// This can occur when a node is deleted then reallocated as a new node - +/// the mapping in ReplacedValues applies to the deleted node, not the new +/// one. +/// The only map that can have a deleted node as a source is ReplacedValues. +/// Other maps can have deleted nodes as targets, but since their looked-up +/// values are always immediately remapped using RemapValue, resulting in a +/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue +/// always performs correct mappings. In order to keep the mapping correct, +/// ExpungeNode should be called on any new nodes *before* adding them as +/// either source or target to ReplacedValues (which typically means calling +/// Expunge when a new node is first seen, since it may no longer be marked +/// NewNode by the time it is added to ReplacedValues). +void DAGTypeLegalizer::ExpungeNode(SDNode *N) { + if (N->getNodeId() != NewNode) + return; + + // If N is not remapped by ReplacedValues then there is nothing to do. + unsigned i, e; + for (i = 0, e = N->getNumValues(); i != e; ++i) + if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end()) + break; + + if (i == e) + return; + + // Remove N from all maps - this is expensive but rare. + + for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(), + E = PromotedIntegers.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(), + E = SoftenedFloats.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(), + E = ScalarizedVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(), + E = WidenedVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){ + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(), + E = ReplacedValues.end(); I != E; ++I) + RemapValue(I->second); + + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + ReplacedValues.erase(SDValue(N, i)); +} + +/// RemapValue - If the specified value was already legalized to another value, +/// replace it by that value. +void DAGTypeLegalizer::RemapValue(SDValue &N) { + DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N); + if (I != ReplacedValues.end()) { + // Use path compression to speed up future lookups if values get multiply + // replaced with other values. + RemapValue(I->second); + N = I->second; + assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!"); + } +} + +namespace { + /// NodeUpdateListener - This class is a DAGUpdateListener that listens for + /// updates to nodes and recomputes their ready state. + class VISIBILITY_HIDDEN NodeUpdateListener : + public SelectionDAG::DAGUpdateListener { + DAGTypeLegalizer &DTL; + SmallSetVector<SDNode*, 16> &NodesToAnalyze; + public: + explicit NodeUpdateListener(DAGTypeLegalizer &dtl, + SmallSetVector<SDNode*, 16> &nta) + : DTL(dtl), NodesToAnalyze(nta) {} + + virtual void NodeDeleted(SDNode *N, SDNode *E) { + assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && + N->getNodeId() != DAGTypeLegalizer::Processed && + "Invalid node ID for RAUW deletion!"); + // It is possible, though rare, for the deleted node N to occur as a + // target in a map, so note the replacement N -> E in ReplacedValues. + assert(E && "Node not replaced?"); + DTL.NoteDeletion(N, E); + + // In theory the deleted node could also have been scheduled for analysis. + // So remove it from the set of nodes which will be analyzed. + NodesToAnalyze.remove(N); + + // In general nothing needs to be done for E, since it didn't change but + // only gained new uses. However N -> E was just added to ReplacedValues, + // and the result of a ReplacedValues mapping is not allowed to be marked + // NewNode. So if E is marked NewNode, then it needs to be analyzed. + if (E->getNodeId() == DAGTypeLegalizer::NewNode) + NodesToAnalyze.insert(E); + } + + virtual void NodeUpdated(SDNode *N) { + // Node updates can mean pretty much anything. It is possible that an + // operand was set to something already processed (f.e.) in which case + // this node could become ready. Recompute its flags. + assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && + N->getNodeId() != DAGTypeLegalizer::Processed && + "Invalid node ID for RAUW deletion!"); + N->setNodeId(DAGTypeLegalizer::NewNode); + NodesToAnalyze.insert(N); + } + }; +} + + +/// ReplaceValueWithHelper - Internal helper for ReplaceValueWith. Updates the +/// DAG causing any uses of From to use To instead, but without expunging From +/// or recording the replacement in ReplacedValues. Do not call directly unless +/// you really know what you are doing! +void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) { + assert(From.getNode() != To.getNode() && "Potential legalization loop!"); + + // If expansion produced new nodes, make sure they are properly marked. + AnalyzeNewValue(To); // Expunges To. + + // Anything that used the old node should now use the new one. Note that this + // can potentially cause recursive merging. + SmallSetVector<SDNode*, 16> NodesToAnalyze; + NodeUpdateListener NUL(*this, NodesToAnalyze); + DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); + + // Process the list of nodes that need to be reanalyzed. + while (!NodesToAnalyze.empty()) { + SDNode *N = NodesToAnalyze.back(); + NodesToAnalyze.pop_back(); + if (N->getNodeId() != DAGTypeLegalizer::NewNode) + // The node was analyzed while reanalyzing an earlier node - it is safe to + // skip. Note that this is not a morphing node - otherwise it would still + // be marked NewNode. + continue; + + // Analyze the node's operands and recalculate the node ID. + SDNode *M = AnalyzeNewNode(N); + if (M != N) { + // The node morphed into a different node. Make everyone use the new node + // instead. + assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + SDValue OldVal(N, i); + SDValue NewVal(M, i); + if (M->getNodeId() == Processed) + RemapValue(NewVal); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + } + // The original node continues to exist in the DAG, marked NewNode. + } + } +} + +/// ReplaceValueWith - The specified value was legalized to the specified other +/// value. Update the DAG and NodeIds replacing any uses of From to use To +/// instead. +void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { + assert(From.getNode()->getNodeId() == ReadyToProcess && + "Only the node being processed may be remapped!"); + + // If expansion produced new nodes, make sure they are properly marked. + ExpungeNode(From.getNode()); + AnalyzeNewValue(To); // Expunges To. + + // The old node may still be present in a map like ExpandedIntegers or + // PromotedIntegers. Inform maps about the replacement. + ReplacedValues[From] = To; + + // Do the replacement. + ReplaceValueWithHelper(From, To); +} + +void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { + AnalyzeNewValue(Result); + + SDValue &OpEntry = PromotedIntegers[Op]; + assert(OpEntry.getNode() == 0 && "Node is already promoted!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { + AnalyzeNewValue(Result); + + SDValue &OpEntry = SoftenedFloats[Op]; + assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { + AnalyzeNewValue(Result); + + SDValue &OpEntry = ScalarizedVectors[Op]; + assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't expanded"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, + SDValue Hi) { + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; + assert(Entry.first.getNode() == 0 && "Node already expanded"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't expanded"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, + SDValue Hi) { + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; + assert(Entry.first.getNode() == 0 && "Node already expanded"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't split"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, + SDValue Hi) { + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; + assert(Entry.first.getNode() == 0 && "Node already split"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { + AnalyzeNewValue(Result); + + SDValue &OpEntry = WidenedVectors[Op]; + assert(OpEntry.getNode() == 0 && "Node already widened!"); + OpEntry = Result; +} + + +//===----------------------------------------------------------------------===// +// Utilities. +//===----------------------------------------------------------------------===// + +/// BitConvertToInteger - Convert to an integer of the same size. +SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { + unsigned BitWidth = Op.getValueType().getSizeInBits(); + return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), + MVT::getIntegerVT(BitWidth), Op); +} + +/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the +/// same size. +SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { + assert(Op.getValueType().isVector() && "Only applies to vectors!"); + unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); + MVT EltNVT = MVT::getIntegerVT(EltWidth); + unsigned NumElts = Op.getValueType().getVectorNumElements(); + return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), + MVT::getVectorVT(EltNVT, NumElts), Op); +} + +SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, + MVT DestVT) { + DebugLoc dl = Op.getDebugLoc(); + // Create the stack frame object. Make sure it is aligned for both + // the source and destination types. + SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0); + // Result is a load from the stack slot. + return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0); +} + +/// CustomLowerNode - Replace the node's results with custom code provided +/// by the target and return "true", or do nothing and return "false". +/// The last parameter is FALSE if we are dealing with a node with legal +/// result types and illegal operand. The second parameter denotes the type of +/// illegal OperandNo in that case. +/// The last parameter being TRUE means we are dealing with a +/// node with illegal result types. The second parameter denotes the type of +/// illegal ResNo in that case. +bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult) { + // See if the target wants to custom lower this node. + if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) + return false; + + SmallVector<SDValue, 8> Results; + if (LegalizeResult) + TLI.ReplaceNodeResults(N, Results, DAG); + else + TLI.LowerOperationWrapper(N, Results, DAG); + + if (Results.empty()) + // The target didn't want to custom lower it after all. + return false; + + // Make everything that once used N's values now use those in Results instead. + assert(Results.size() == N->getNumValues() && + "Custom lowering returned the wrong number of results!"); + for (unsigned i = 0, e = Results.size(); i != e; ++i) + ReplaceValueWith(SDValue(N, i), Results[i]); + return true; +} + +/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type +/// which is split into two not necessarily identical pieces. +void DAGTypeLegalizer::GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT) { + if (!InVT.isVector()) { + LoVT = HiVT = TLI.getTypeToTransformTo(InVT); + } else { + MVT NewEltVT = InVT.getVectorElementType(); + unsigned NumElements = InVT.getVectorNumElements(); + if ((NumElements & (NumElements-1)) == 0) { // Simple power of two vector. + NumElements >>= 1; + LoVT = HiVT = MVT::getVectorVT(NewEltVT, NumElements); + } else { // Non-power-of-two vectors. + unsigned NewNumElts_Lo = 1 << Log2_32(NumElements); + unsigned NewNumElts_Hi = NumElements - NewNumElts_Lo; + LoVT = MVT::getVectorVT(NewEltVT, NewNumElts_Lo); + HiVT = MVT::getVectorVT(NewEltVT, NewNumElts_Hi); + } + } +} + +/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and +/// high parts of the given value. +void DAGTypeLegalizer::GetPairElements(SDValue Pair, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = Pair.getDebugLoc(); + MVT NVT = TLI.getTypeToTransformTo(Pair.getValueType()); + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, + DAG.getIntPtrConstant(1)); +} + +SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, MVT EltVT, + SDValue Index) { + DebugLoc dl = Index.getDebugLoc(); + // Make sure the index type is big enough to compute in. + if (Index.getValueType().bitsGT(TLI.getPointerTy())) + Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index); + else + Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index); + + // Calculate the element offset and add it to the pointer. + unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. + + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, + DAG.getConstant(EltSize, Index.getValueType())); + return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr); +} + +/// JoinIntegers - Build an integer with low bits Lo and high bits Hi. +SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { + // Arbitrarily use dlHi for result DebugLoc + DebugLoc dlHi = Hi.getDebugLoc(); + DebugLoc dlLo = Lo.getDebugLoc(); + MVT LVT = Lo.getValueType(); + MVT HVT = Hi.getValueType(); + MVT NVT = MVT::getIntegerVT(LVT.getSizeInBits() + HVT.getSizeInBits()); + + Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); + Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi, + DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy())); + return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); +} + +/// LibCallify - Convert the node into a libcall with the same prototype. +SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, + bool isSigned) { + unsigned NumOps = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); + if (NumOps == 0) { + return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl); + } else if (NumOps == 1) { + SDValue Op = N->getOperand(0); + return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl); + } else if (NumOps == 2) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl); + } + SmallVector<SDValue, 8> Ops(NumOps); + for (unsigned i = 0; i < NumOps; ++i) + Ops[i] = N->getOperand(i); + + return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl); +} + +/// MakeLibCall - Generate a libcall taking the given operands as arguments and +/// returning a result of type RetVT. +SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForMVT(); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + const Type *RetTy = RetVT.getTypeForMVT(); + std::pair<SDValue,SDValue> CallInfo = + TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, CallingConv::C, false, Callee, Args, DAG, dl); + return CallInfo.first; +} + +/// PromoteTargetBoolean - Promote the given target boolean to a target boolean +/// of the given type. A target boolean is an integer value, not necessarily of +/// type i1, the bits of which conform to getBooleanContents. +SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, MVT VT) { + DebugLoc dl = Bool.getDebugLoc(); + ISD::NodeType ExtendCode; + switch (TLI.getBooleanContents()) { + default: + assert(false && "Unknown BooleanContent!"); + case TargetLowering::UndefinedBooleanContent: + // Extend to VT by adding rubbish bits. + ExtendCode = ISD::ANY_EXTEND; + break; + case TargetLowering::ZeroOrOneBooleanContent: + // Extend to VT by adding zero bits. + ExtendCode = ISD::ZERO_EXTEND; + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: { + // Extend to VT by copying the sign bit. + ExtendCode = ISD::SIGN_EXTEND; + break; + } + } + return DAG.getNode(ExtendCode, dl, VT, Bool); +} + +/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT +/// bits in Hi. +void DAGTypeLegalizer::SplitInteger(SDValue Op, + MVT LoVT, MVT HiVT, + SDValue &Lo, SDValue &Hi) { + DebugLoc dl = Op.getDebugLoc(); + assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == + Op.getValueType().getSizeInBits() && "Invalid integer splitting!"); + Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); + Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, + DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy())); + Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); +} + +/// SplitInteger - Return the lower and upper halves of Op's bits in a value +/// type half the size of Op's. +void DAGTypeLegalizer::SplitInteger(SDValue Op, + SDValue &Lo, SDValue &Hi) { + MVT HalfVT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()/2); + SplitInteger(Op, HalfVT, HalfVT, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Entry Point +//===----------------------------------------------------------------------===// + +/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that +/// only uses types natively supported by the target. Returns "true" if it made +/// any changes. +/// +/// Note that this is an involved process that may invalidate pointers into +/// the graph. +bool SelectionDAG::LegalizeTypes() { + return DAGTypeLegalizer(*this).run(); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h new file mode 100644 index 000000000000..75c89246a31e --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -0,0 +1,736 @@ +//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the DAGTypeLegalizer class. This is a private interface +// shared between the code that implements the SelectionDAG::LegalizeTypes +// method. +// +//===----------------------------------------------------------------------===// + +#ifndef SELECTIONDAG_LEGALIZETYPES_H +#define SELECTIONDAG_LEGALIZETYPES_H + +#define DEBUG_TYPE "legalize-types" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" + +namespace llvm { + +//===----------------------------------------------------------------------===// +/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks +/// on it until only value types the target machine can handle are left. This +/// involves promoting small sizes to large sizes or splitting up large values +/// into small values. +/// +class VISIBILITY_HIDDEN DAGTypeLegalizer { + TargetLowering &TLI; + SelectionDAG &DAG; +public: + // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information + // about the state of the node. The enum has all the values. + enum NodeIdFlags { + /// ReadyToProcess - All operands have been processed, so this node is ready + /// to be handled. + ReadyToProcess = 0, + + /// NewNode - This is a new node, not before seen, that was created in the + /// process of legalizing some other node. + NewNode = -1, + + /// Unanalyzed - This node's ID needs to be set to the number of its + /// unprocessed operands. + Unanalyzed = -2, + + /// Processed - This is a node that has already been processed. + Processed = -3 + + // 1+ - This is a node which has this many unprocessed operands. + }; +private: + enum LegalizeAction { + Legal, // The target natively supports this type. + PromoteInteger, // Replace this integer type with a larger one. + ExpandInteger, // Split this integer type into two of half the size. + SoftenFloat, // Convert this float type to a same size integer type. + ExpandFloat, // Split this float type into two of half the size. + ScalarizeVector, // Replace this one-element vector with its element type. + SplitVector, // This vector type should be split into smaller vectors. + WidenVector // This vector type should be widened into a larger vector. + }; + + /// ValueTypeActions - This is a bitvector that contains two bits for each + /// simple value type, where the two bits correspond to the LegalizeAction + /// enum from TargetLowering. This can be queried with "getTypeAction(VT)". + TargetLowering::ValueTypeActionImpl ValueTypeActions; + + /// getTypeAction - Return how we should legalize values of this type. + LegalizeAction getTypeAction(MVT VT) const { + switch (ValueTypeActions.getTypeAction(VT)) { + default: + assert(false && "Unknown legalize action!"); + case TargetLowering::Legal: + return Legal; + case TargetLowering::Promote: + // Promote can mean + // 1) For integers, use a larger integer type (e.g. i8 -> i32). + // 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32). + if (!VT.isVector()) + return PromoteInteger; + else + return WidenVector; + case TargetLowering::Expand: + // Expand can mean + // 1) split scalar in half, 2) convert a float to an integer, + // 3) scalarize a single-element vector, 4) split a vector in two. + if (!VT.isVector()) { + if (VT.isInteger()) + return ExpandInteger; + else if (VT.getSizeInBits() == + TLI.getTypeToTransformTo(VT).getSizeInBits()) + return SoftenFloat; + else + return ExpandFloat; + } else if (VT.getVectorNumElements() == 1) { + return ScalarizeVector; + } else { + return SplitVector; + } + } + } + + /// isTypeLegal - Return true if this type is legal on this target. + bool isTypeLegal(MVT VT) const { + return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal; + } + + /// IgnoreNodeResults - Pretend all of this node's results are legal. + bool IgnoreNodeResults(SDNode *N) const { + return N->getOpcode() == ISD::TargetConstant; + } + + /// PromotedIntegers - For integer nodes that are below legal width, this map + /// indicates what promoted value to use. + DenseMap<SDValue, SDValue> PromotedIntegers; + + /// ExpandedIntegers - For integer nodes that need to be expanded this map + /// indicates which operands are the expanded version of the input. + DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedIntegers; + + /// SoftenedFloats - For floating point nodes converted to integers of + /// the same size, this map indicates the converted value to use. + DenseMap<SDValue, SDValue> SoftenedFloats; + + /// ExpandedFloats - For float nodes that need to be expanded this map + /// indicates which operands are the expanded version of the input. + DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedFloats; + + /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the + /// scalar value of type 'ty' to use. + DenseMap<SDValue, SDValue> ScalarizedVectors; + + /// SplitVectors - For nodes that need to be split this map indicates + /// which operands are the expanded version of the input. + DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors; + + /// WidenedVectors - For vector nodes that need to be widened, indicates + /// the widened value to use. + DenseMap<SDValue, SDValue> WidenedVectors; + + /// ReplacedValues - For values that have been replaced with another, + /// indicates the replacement value to use. + DenseMap<SDValue, SDValue> ReplacedValues; + + /// Worklist - This defines a worklist of nodes to process. In order to be + /// pushed onto this worklist, all operands of a node must have already been + /// processed. + SmallVector<SDNode*, 128> Worklist; + +public: + explicit DAGTypeLegalizer(SelectionDAG &dag) + : TLI(dag.getTargetLoweringInfo()), DAG(dag), + ValueTypeActions(TLI.getValueTypeActions()) { + assert(MVT::LAST_VALUETYPE <= 32 && + "Too many value types for ValueTypeActions to hold!"); + } + + /// run - This is the main entry point for the type legalizer. This does a + /// top-down traversal of the dag, legalizing types as it goes. Returns + /// "true" if it made any changes. + bool run(); + + void NoteDeletion(SDNode *Old, SDNode *New) { + ExpungeNode(Old); + ExpungeNode(New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) + ReplacedValues[SDValue(Old, i)] = SDValue(New, i); + } + +private: + SDNode *AnalyzeNewNode(SDNode *N); + void AnalyzeNewValue(SDValue &Val); + void ExpungeNode(SDNode *N); + void PerformExpensiveChecks(); + void RemapValue(SDValue &N); + + // Common routines. + SDValue BitConvertToInteger(SDValue Op); + SDValue BitConvertVectorToIntegerVector(SDValue Op); + SDValue CreateStackStoreLoad(SDValue Op, MVT DestVT); + bool CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult); + SDValue GetVectorElementPointer(SDValue VecPtr, MVT EltVT, SDValue Index); + SDValue JoinIntegers(SDValue Lo, SDValue Hi); + SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); + SDValue MakeLibCall(RTLIB::Libcall LC, MVT RetVT, + const SDValue *Ops, unsigned NumOps, bool isSigned, + DebugLoc dl); + SDValue PromoteTargetBoolean(SDValue Bool, MVT VT); + void ReplaceValueWith(SDValue From, SDValue To); + void ReplaceValueWithHelper(SDValue From, SDValue To); + void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); + void SplitInteger(SDValue Op, MVT LoVT, MVT HiVT, + SDValue &Lo, SDValue &Hi); + + //===--------------------------------------------------------------------===// + // Integer Promotion Support: LegalizeIntegerTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetPromotedInteger - Given a processed operand Op which was promoted to a + /// larger integer type, this returns the promoted value. The low bits of the + /// promoted value corresponding to the original type are exactly equal to Op. + /// The extra bits contain rubbish, so the promoted value may need to be zero- + /// or sign-extended from the original type before it is usable (the helpers + /// SExtPromotedInteger and ZExtPromotedInteger can do this for you). + /// For example, if Op is an i16 and was promoted to an i32, then this method + /// returns an i32, the lower 16 bits of which coincide with Op, and the upper + /// 16 bits of which contain rubbish. + SDValue GetPromotedInteger(SDValue Op) { + SDValue &PromotedOp = PromotedIntegers[Op]; + RemapValue(PromotedOp); + assert(PromotedOp.getNode() && "Operand wasn't promoted?"); + return PromotedOp; + } + void SetPromotedInteger(SDValue Op, SDValue Result); + + /// SExtPromotedInteger - Get a promoted operand and sign extend it to the + /// final size. + SDValue SExtPromotedInteger(SDValue Op) { + MVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + Op = GetPromotedInteger(Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, + DAG.getValueType(OldVT)); + } + + /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the + /// final size. + SDValue ZExtPromotedInteger(SDValue Op) { + MVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + Op = GetPromotedInteger(Op); + return DAG.getZeroExtendInReg(Op, dl, OldVT); + } + + // Integer Result Promotion. + void PromoteIntegerResult(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_AssertSext(SDNode *N); + SDValue PromoteIntRes_AssertZext(SDNode *N); + SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); + SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); + SDValue PromoteIntRes_BIT_CONVERT(SDNode *N); + SDValue PromoteIntRes_BSWAP(SDNode *N); + SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); + SDValue PromoteIntRes_Constant(SDNode *N); + SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N); + SDValue PromoteIntRes_CTLZ(SDNode *N); + SDValue PromoteIntRes_CTPOP(SDNode *N); + SDValue PromoteIntRes_CTTZ(SDNode *N); + SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); + SDValue PromoteIntRes_INT_EXTEND(SDNode *N); + SDValue PromoteIntRes_LOAD(LoadSDNode *N); + SDValue PromoteIntRes_Overflow(SDNode *N); + SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_SDIV(SDNode *N); + SDValue PromoteIntRes_SELECT(SDNode *N); + SDValue PromoteIntRes_SELECT_CC(SDNode *N); + SDValue PromoteIntRes_SETCC(SDNode *N); + SDValue PromoteIntRes_SHL(SDNode *N); + SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); + SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); + SDValue PromoteIntRes_SRA(SDNode *N); + SDValue PromoteIntRes_SRL(SDNode *N); + SDValue PromoteIntRes_TRUNCATE(SDNode *N); + SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_UDIV(SDNode *N); + SDValue PromoteIntRes_UNDEF(SDNode *N); + SDValue PromoteIntRes_VAARG(SDNode *N); + SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); + + // Integer Operand Promotion. + bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo); + SDValue PromoteIntOp_ANY_EXTEND(SDNode *N); + SDValue PromoteIntOp_BIT_CONVERT(SDNode *N); + SDValue PromoteIntOp_BUILD_PAIR(SDNode *N); + SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N); + SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N); + SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MEMBARRIER(SDNode *N); + SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_Shift(SDNode *N); + SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); + SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_TRUNCATE(SDNode *N); + SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); + + void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); + + //===--------------------------------------------------------------------===// + // Integer Expansion Support: LegalizeIntegerTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetExpandedInteger - Given a processed operand Op which was expanded into + /// two integers of half the size, this returns the two halves. The low bits + /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi. + /// For example, if Op is an i64 which was expanded into two i32's, then this + /// method returns the two i32's, with Lo being equal to the lower 32 bits of + /// Op, and Hi being equal to the upper 32 bits. + void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi); + + // Integer Result Expansion. + void ExpandIntegerResult(SDNode *N, unsigned ResNo); + void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandShiftByConstant(SDNode *N, unsigned Amt, + SDValue &Lo, SDValue &Hi); + bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); + bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Integer Operand Expansion. + bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); + SDValue ExpandIntOp_BIT_CONVERT(SDNode *N); + SDValue ExpandIntOp_BR_CC(SDNode *N); + SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N); + SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N); + SDValue ExpandIntOp_SELECT_CC(SDNode *N); + SDValue ExpandIntOp_SETCC(SDNode *N); + SDValue ExpandIntOp_Shift(SDNode *N); + SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); + SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue ExpandIntOp_TRUNCATE(SDNode *N); + SDValue ExpandIntOp_UINT_TO_FP(SDNode *N); + + void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl); + + //===--------------------------------------------------------------------===// + // Float to Integer Conversion Support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetSoftenedFloat - Given a processed operand Op which was converted to an + /// integer of the same size, this returns the integer. The integer contains + /// exactly the same bits as Op - only the type changed. For example, if Op + /// is an f32 which was softened to an i32, then this method returns an i32, + /// the bits of which coincide with those of Op. + SDValue GetSoftenedFloat(SDValue Op) { + SDValue &SoftenedOp = SoftenedFloats[Op]; + RemapValue(SoftenedOp); + assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); + return SoftenedOp; + } + void SetSoftenedFloat(SDValue Op, SDValue Result); + + // Result Float to Integer Conversion. + void SoftenFloatResult(SDNode *N, unsigned OpNo); + SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N); + SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); + SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); + SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FADD(SDNode *N); + SDValue SoftenFloatRes_FCEIL(SDNode *N); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatRes_FCOS(SDNode *N); + SDValue SoftenFloatRes_FDIV(SDNode *N); + SDValue SoftenFloatRes_FEXP(SDNode *N); + SDValue SoftenFloatRes_FEXP2(SDNode *N); + SDValue SoftenFloatRes_FFLOOR(SDNode *N); + SDValue SoftenFloatRes_FLOG(SDNode *N); + SDValue SoftenFloatRes_FLOG2(SDNode *N); + SDValue SoftenFloatRes_FLOG10(SDNode *N); + SDValue SoftenFloatRes_FMUL(SDNode *N); + SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); + SDValue SoftenFloatRes_FNEG(SDNode *N); + SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); + SDValue SoftenFloatRes_FP_ROUND(SDNode *N); + SDValue SoftenFloatRes_FPOW(SDNode *N); + SDValue SoftenFloatRes_FPOWI(SDNode *N); + SDValue SoftenFloatRes_FREM(SDNode *N); + SDValue SoftenFloatRes_FRINT(SDNode *N); + SDValue SoftenFloatRes_FSIN(SDNode *N); + SDValue SoftenFloatRes_FSQRT(SDNode *N); + SDValue SoftenFloatRes_FSUB(SDNode *N); + SDValue SoftenFloatRes_FTRUNC(SDNode *N); + SDValue SoftenFloatRes_LOAD(SDNode *N); + SDValue SoftenFloatRes_SELECT(SDNode *N); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N); + SDValue SoftenFloatRes_UNDEF(SDNode *N); + SDValue SoftenFloatRes_VAARG(SDNode *N); + SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); + + // Operand Float to Integer Conversion. + bool SoftenFloatOperand(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N); + SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FP_ROUND(SDNode *N); + SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); + SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N); + SDValue SoftenFloatOp_SELECT_CC(SDNode *N); + SDValue SoftenFloatOp_SETCC(SDNode *N); + SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); + + void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl); + + //===--------------------------------------------------------------------===// + // Float Expansion Support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetExpandedFloat - Given a processed operand Op which was expanded into + /// two floating point values of half the size, this returns the two halves. + /// The low bits of Op are exactly equal to the bits of Lo; the high bits + /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded + /// into two f64's, then this method returns the two f64's, with Lo being + /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits. + void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi); + + // Float Result Expansion. + void ExpandFloatResult(SDNode *N, unsigned ResNo); + void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Float Operand Expansion. + bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); + SDValue ExpandFloatOp_BR_CC(SDNode *N); + SDValue ExpandFloatOp_FP_ROUND(SDNode *N); + SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); + SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); + SDValue ExpandFloatOp_SELECT_CC(SDNode *N); + SDValue ExpandFloatOp_SETCC(SDNode *N); + SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo); + + void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, DebugLoc dl); + + //===--------------------------------------------------------------------===// + // Scalarization Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetScalarizedVector - Given a processed one-element vector Op which was + /// scalarized to its element type, this returns the element. For example, + /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32. + SDValue GetScalarizedVector(SDValue Op) { + SDValue &ScalarizedOp = ScalarizedVectors[Op]; + RemapValue(ScalarizedOp); + assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?"); + return ScalarizedOp; + } + void SetScalarizedVector(SDValue Op, SDValue Result); + + // Vector Result Scalarization: <1 x ty> -> ty. + void ScalarizeVectorResult(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecRes_BinOp(SDNode *N); + SDValue ScalarizeVecRes_ShiftOp(SDNode *N); + SDValue ScalarizeVecRes_UnaryOp(SDNode *N); + + SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N); + SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); + SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue ScalarizeVecRes_FPOWI(SDNode *N); + SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue ScalarizeVecRes_SELECT(SDNode *N); + SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); + SDValue ScalarizeVecRes_UNDEF(SDNode *N); + SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); + SDValue ScalarizeVecRes_VSETCC(SDNode *N); + + // Vector Operand Scalarization: <1 x ty> -> ty. + bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N); + SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); + SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// + // Vector Splitting Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetSplitVector - Given a processed vector Op which was split into smaller + /// vectors, this method returns the smaller vectors. The first elements of + /// Op coincide with the elements of Lo; the remaining elements of Op coincide + /// with the elements of Hi: Op is what you would get by concatenating Lo and + /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then + /// this method returns the two v4i32's, with Lo corresponding to the first 4 + /// elements of Op, and Hi to the last 4 elements. + void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); + + // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. + void SplitVectorResult(SDNode *N, unsigned OpNo); + void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + + void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, + SDValue &Hi); + void SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. + bool SplitVectorOperand(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_UnaryOp(SDNode *N); + + SDValue SplitVecOp_BIT_CONVERT(SDNode *N); + SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// + // Vector Widening Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetWidenedVector - Given a processed vector Op which was widened into a + /// larger vector, this method returns the larger vector. The elements of + /// the returned vector consist of the elements of Op followed by elements + /// containing rubbish. For example, if Op is a v2i32 that was widened to a + /// v4i32, then this method returns a v4i32 for which the first two elements + /// are the same as those of Op, while the last two elements contain rubbish. + SDValue GetWidenedVector(SDValue Op) { + SDValue &WidenedOp = WidenedVectors[Op]; + RemapValue(WidenedOp); + assert(WidenedOp.getNode() && "Operand wasn't widened?"); + return WidenedOp; + } + void SetWidenedVector(SDValue Op, SDValue Result); + + // Widen Vector Result Promotion. + void WidenVectorResult(SDNode *N, unsigned ResNo); + SDValue WidenVecRes_BIT_CONVERT(SDNode* N); + SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); + SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); + SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N); + SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); + SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_LOAD(SDNode* N); + SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); + SDValue WidenVecRes_SELECT(SDNode* N); + SDValue WidenVecRes_SELECT_CC(SDNode* N); + SDValue WidenVecRes_UNDEF(SDNode *N); + SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); + SDValue WidenVecRes_VSETCC(SDNode* N); + + SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_Shift(SDNode *N); + SDValue WidenVecRes_Unary(SDNode *N); + + // Widen Vector Operand. + bool WidenVectorOperand(SDNode *N, unsigned ResNo); + SDValue WidenVecOp_BIT_CONVERT(SDNode *N); + SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); + SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue WidenVecOp_STORE(SDNode* N); + + SDValue WidenVecOp_Convert(SDNode *N); + + //===--------------------------------------------------------------------===// + // Vector Widening Utilities Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// Helper genWidenVectorLoads - Helper function to generate a set of + /// loads to load a vector with a resulting wider type. It takes + /// ExtType: Extension type + /// LdChain: list of chains for the load we have generated. + /// Chain: incoming chain for the ld vector. + /// BasePtr: base pointer to load from. + /// SV: memory disambiguation source value. + /// SVOffset: memory disambiugation offset. + /// Alignment: alignment of the memory. + /// isVolatile: volatile load. + /// LdWidth: width of memory that we want to load. + /// ResType: the wider result result type for the resulting vector. + /// dl: DebugLoc to be applied to new nodes + SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, SDValue Chain, + SDValue BasePtr, const Value *SV, + int SVOffset, unsigned Alignment, + bool isVolatile, unsigned LdWidth, + MVT ResType, DebugLoc dl); + + /// Helper genWidenVectorStores - Helper function to generate a set of + /// stores to store a widen vector into non widen memory + /// It takes + /// StChain: list of chains for the stores we have generated + /// Chain: incoming chain for the ld vector + /// BasePtr: base pointer to load from + /// SV: memory disambiguation source value + /// SVOffset: memory disambiugation offset + /// Alignment: alignment of the memory + /// isVolatile: volatile lod + /// ValOp: value to store + /// StWidth: width of memory that we want to store + /// dl: DebugLoc to be applied to new nodes + void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, SDValue Chain, + SDValue BasePtr, const Value *SV, + int SVOffset, unsigned Alignment, + bool isVolatile, SDValue ValOp, + unsigned StWidth, DebugLoc dl); + + /// Modifies a vector input (widen or narrows) to a vector of NVT. The + /// input vector must have the same element type as NVT. + SDValue ModifyToType(SDValue InOp, MVT WidenVT); + + + //===--------------------------------------------------------------------===// + // Generic Splitting: LegalizeTypesGeneric.cpp + //===--------------------------------------------------------------------===// + + // Legalization methods which only use that the illegal type is split into two + // not necessarily identical types. As such they can be used for splitting + // vectors and expanding integers and floats. + + void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) { + if (Op.getValueType().isVector()) + GetSplitVector(Op, Lo, Hi); + else if (Op.getValueType().isInteger()) + GetExpandedInteger(Op, Lo, Hi); + else + GetExpandedFloat(Op, Lo, Hi); + } + + /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type + /// which is split (or expanded) into two not necessarily identical pieces. + void GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT); + + /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and + /// high parts of the given value. + void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); + + // Generic Result Splitting. + void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + + //===--------------------------------------------------------------------===// + // Generic Expansion: LegalizeTypesGeneric.cpp + //===--------------------------------------------------------------------===// + + // Legalization methods which only use that the illegal type is split into two + // identical types of half the size, and that the Lo/Hi part is stored first + // in memory on little/big-endian machines, followed by the Hi/Lo part. As + // such they can be used for expanding integers and floats. + + void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) { + if (Op.getValueType().isInteger()) + GetExpandedInteger(Op, Lo, Hi); + else + GetExpandedFloat(Op, Lo, Hi); + } + + // Generic Result Expansion. + void ExpandRes_BIT_CONVERT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi); + + // Generic Operand Expansion. + SDValue ExpandOp_BIT_CONVERT (SDNode *N); + SDValue ExpandOp_BUILD_VECTOR (SDNode *N); + SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N); + SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N); + SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N); + SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo); +}; + +} // end namespace llvm. + +#endif diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp new file mode 100644 index 000000000000..e8ff3fc9efb4 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -0,0 +1,453 @@ +//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements generic type expansion and splitting for LegalizeTypes. +// The routines here perform legalization when the details of the type (such as +// whether it is an integer or a float) do not matter. +// Expansion is the act of changing a computation in an illegal type to be a +// computation in two identical registers of a smaller type. +// Splitting is the act of changing a computation in an illegal type to be a +// computation in two not necessarily identical registers of a smaller type. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/Target/TargetData.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Generic Result Expansion. +//===----------------------------------------------------------------------===// + +// These routines assume that the Lo/Hi part is stored first in memory on +// little/big-endian machines, followed by the Hi/Lo part. This means that +// they cannot be used as is on vectors, for which Lo is always stored first. + +void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT OutVT = N->getValueType(0); + MVT NOutVT = TLI.getTypeToTransformTo(OutVT); + SDValue InOp = N->getOperand(0); + MVT InVT = InOp.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + case Legal: + case PromoteInteger: + break; + case SoftenFloat: + // Convert the integer operand instead. + SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + case ExpandInteger: + case ExpandFloat: + // Convert the expanded pieces of the input. + GetExpandedOp(InOp, Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + case SplitVector: + // Convert the split parts of the input if it was split in two. + GetSplitVector(InOp, Lo, Hi); + if (Lo.getValueType() == Hi.getValueType()) { + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + } + break; + case ScalarizeVector: + // Convert the element instead. + SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + case WidenVector: { + assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT"); + InOp = GetWidenedVector(InOp); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + InVT.getVectorNumElements()/2); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; + } + } + + // Lower the bit-convert to a store/load from the stack. + assert(NOutVT.isByteSized() && "Expanded type not byte sized!"); + + // Create the stack frame object. Make sure it is aligned for both + // the source and expanded destination types. + unsigned Alignment = + TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForMVT()); + SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0); + + // Load the first half from the stack slot. + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0); + + // Increment the pointer to the other half. + unsigned IncrementSize = NOutVT.getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getIntPtrConstant(IncrementSize)); + + // Load the second half from the stack slot. + Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false, + MinAlign(Alignment, IncrementSize)); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Return the operands. + Lo = N->getOperand(0); + Hi = N->getOperand(1); +} + +void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + GetExpandedOp(N->getOperand(0), Lo, Hi); + SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? + Hi : Lo; + + assert(Part.getValueType() == N->getValueType(0) && + "Type twice as big as expanded type not itself expanded!"); + + GetPairElements(Part, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue OldVec = N->getOperand(0); + unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + DebugLoc dl = N->getDebugLoc(); + + // Convert to a vector of the expanded element type, for example + // <3 x i64> -> <6 x i32>. + MVT OldVT = N->getValueType(0); + MVT NewVT = TLI.getTypeToTransformTo(OldVT); + + SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::getVectorVT(NewVT, 2*OldElts), + OldVec); + + // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. + SDValue Idx = N->getOperand(1); + + // Make sure the type of Idx is big enough to hold the new values. + if (Idx.getValueType().bitsLT(TLI.getPointerTy())) + Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); + + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + DAG.getConstant(1, Idx.getValueType())); + Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isNormalLoad(N) && "This routine only for normal loads!"); + DebugLoc dl = N->getDebugLoc(); + + LoadSDNode *LD = cast<LoadSDNode>(N); + MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0)); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, + isVolatile, Alignment); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits() / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), + SVOffset+IncrementSize, + isVolatile, MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Chain); +} + +void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { + MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2)); + + // Handle endianness of the load. + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + + +//===--------------------------------------------------------------------===// +// Generic Operand Expansion. +//===--------------------------------------------------------------------===// + +SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + if (N->getValueType(0).isVector()) { + // An illegal expanding type is being converted to a legal vector type. + // Make a two element vector out of the expanded parts and convert that + // instead, but only if the new vector type is legal (otherwise there + // is no point, and it might create expansion loops). For example, on + // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32. + MVT OVT = N->getOperand(0).getValueType(); + MVT NVT = MVT::getVectorVT(TLI.getTypeToTransformTo(OVT), 2); + + if (isTypeLegal(NVT)) { + SDValue Parts[2]; + GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]); + + if (TLI.isBigEndian()) + std::swap(Parts[0], Parts[1]); + + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2); + return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec); + } + } + + // Otherwise, store to a temporary and load out again as the new type. + return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { + // The vector type is legal but the element type needs expansion. + MVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + MVT OldVT = N->getOperand(0).getValueType(); + MVT NewVT = TLI.getTypeToTransformTo(OldVT); + DebugLoc dl = N->getDebugLoc(); + + assert(OldVT == VecVT.getVectorElementType() && + "BUILD_VECTOR operand type doesn't match vector element type!"); + + // Build a vector of twice the length out of the expanded elements. + // For example <3 x i64> -> <6 x i32>. + std::vector<SDValue> NewElts; + NewElts.reserve(NumElts*2); + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Lo, Hi; + GetExpandedOp(N->getOperand(i), Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + NewElts.push_back(Lo); + NewElts.push_back(Hi); + } + + SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::getVectorVT(NewVT, NewElts.size()), + &NewElts[0], NewElts.size()); + + // Convert the new vector to the old vector type. + return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); +} + +SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { + SDValue Lo, Hi; + GetExpandedOp(N->getOperand(0), Lo, Hi); + return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo; +} + +SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { + // The vector type is legal but the element type needs expansion. + MVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + DebugLoc dl = N->getDebugLoc(); + + SDValue Val = N->getOperand(1); + MVT OldEVT = Val.getValueType(); + MVT NewEVT = TLI.getTypeToTransformTo(OldEVT); + + assert(OldEVT == VecVT.getVectorElementType() && + "Inserted element type doesn't match vector element type!"); + + // Bitconvert to a vector of twice the length with elements of the expanded + // type, insert the expanded vector elements, and then convert back. + MVT NewVecVT = MVT::getVectorVT(NewEVT, NumElts*2); + SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + NewVecVT, N->getOperand(0)); + + SDValue Lo, Hi; + GetExpandedOp(Val, Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + SDValue Idx = N->getOperand(2); + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); + Idx = DAG.getNode(ISD::ADD, dl, + Idx.getValueType(), Idx, DAG.getIntPtrConstant(1)); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); + + // Convert the new vector to the old vector type. + return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); +} + +SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + MVT VT = N->getValueType(0); + assert(VT.getVectorElementType() == N->getOperand(0).getValueType() && + "SCALAR_TO_VECTOR operand type doesn't match vector element type!"); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(NumElts); + Ops[0] = N->getOperand(0); + SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); + for (unsigned i = 1; i < NumElts; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { + assert(ISD::isNormalStore(N) && "This routine only for normal stores!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + DebugLoc dl = N->getDebugLoc(); + + StoreSDNode *St = cast<StoreSDNode>(N); + MVT NVT = TLI.getTypeToTransformTo(St->getValue().getValueType()); + SDValue Chain = St->getChain(); + SDValue Ptr = St->getBasePtr(); + int SVOffset = St->getSrcValueOffset(); + unsigned Alignment = St->getAlignment(); + bool isVolatile = St->isVolatile(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + unsigned IncrementSize = NVT.getSizeInBits() / 8; + + SDValue Lo, Hi; + GetExpandedOp(St->getValue(), Lo, Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset, + isVolatile, Alignment); + + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); + Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(), + SVOffset + IncrementSize, + isVolatile, MinAlign(Alignment, IncrementSize)); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); +} + + +//===--------------------------------------------------------------------===// +// Generic Result Splitting. +//===--------------------------------------------------------------------===// + +// Be careful to make no assumptions about which of Lo/Hi is stored first in +// memory (for vectors it is always Lo first followed by Hi in the following +// bytes; for integers and floats it is Lo first if and only if the machine is +// little-endian). + +void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // A MERGE_VALUES node can produce any number of values. We know that the + // first illegal one needs to be expanded into Lo/Hi. + unsigned i; + + // The string of legal results gets turned into input operands, which have + // the same type. + for (i = 0; isTypeLegal(N->getValueType(i)); ++i) + ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); + + // The first illegal result must be the one that needs to be expanded. + GetSplitOp(N->getOperand(i), Lo, Hi); + + // Legalize the rest of the results into the input operands whether they are + // legal or not. + unsigned e = N->getNumValues(); + for (++i; i != e; ++i) + ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); +} + +void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LL, LH, RL, RH; + DebugLoc dl = N->getDebugLoc(); + GetSplitOp(N->getOperand(1), LL, LH); + GetSplitOp(N->getOperand(2), RL, RH); + + SDValue Cond = N->getOperand(0); + Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL); + Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH); +} + +void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LL, LH, RL, RH; + DebugLoc dl = N->getDebugLoc(); + GetSplitOp(N->getOperand(2), LL, LH); + GetSplitOp(N->getOperand(3), RL, RH); + + Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0), + N->getOperand(1), LL, RL, N->getOperand(4)); + Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0), + N->getOperand(1), LH, RH, N->getOperand(4)); +} + +void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + Lo = DAG.getUNDEF(LoVT); + Hi = DAG.getUNDEF(HiVT); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp new file mode 100644 index 000000000000..df9af2147ca5 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -0,0 +1,335 @@ +//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::LegalizeVectors method. +// +// The vector legalizer looks for vector operations which might need to be +// scalarized and legalizes them. This is a separate step from Legalize because +// scalarizing can introduce illegal types. For example, suppose we have an +// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition +// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the +// operation, which introduces nodes with the illegal type i64 which must be +// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; +// the operation must be unrolled, which introduces nodes with the illegal +// type i8 which must be promoted. +// +// This does not legalize vector manipulations like ISD::BUILD_VECTOR, +// or operations that happen to take a vector which are custom-lowered like +// ISD::CALL; the legalization for such operations never produces nodes +// with illegal types, so it's okay to put off legalizing them until +// SelectionDAG::Legalize runs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +namespace { +class VectorLegalizer { + SelectionDAG& DAG; + TargetLowering& TLI; + bool Changed; // Keep track of whether anything changed + + /// LegalizedNodes - For nodes that are of legal width, and that have more + /// than one use, this map indicates what regularized operand to use. This + /// allows us to avoid legalizing the same thing more than once. + DenseMap<SDValue, SDValue> LegalizedNodes; + + // Adds a node to the translation cache + void AddLegalizedOperand(SDValue From, SDValue To) { + LegalizedNodes.insert(std::make_pair(From, To)); + // If someone requests legalization of the new node, return itself. + if (From != To) + LegalizedNodes.insert(std::make_pair(To, To)); + } + + // Legalizes the given node + SDValue LegalizeOp(SDValue Op); + // Assuming the node is legal, "legalize" the results + SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); + // Implements unrolling a generic vector operation, i.e. turning it into + // scalar operations. + SDValue UnrollVectorOp(SDValue Op); + // Implements unrolling a VSETCC. + SDValue UnrollVSETCC(SDValue Op); + // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB + // isn't legal. + SDValue ExpandFNEG(SDValue Op); + // Implements vector promotion; this is essentially just bitcasting the + // operands to a different type and bitcasting the result back to the + // original type. + SDValue PromoteVectorOp(SDValue Op); + + public: + bool Run(); + VectorLegalizer(SelectionDAG& dag) : + DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {} +}; + +bool VectorLegalizer::Run() { + // The legalize process is inherently a bottom-up recursive process (users + // legalize their uses before themselves). Given infinite stack space, we + // could just start legalizing on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, compute an ordering of the nodes where each + // node is only legalized after all of its operands are legalized. + DAG.AssignTopologicalOrder(); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != next(E); ++I) + LegalizeOp(SDValue(I, 0)); + + // Finally, it's possible the root changed. Get the new root. + SDValue OldRoot = DAG.getRoot(); + assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); + DAG.setRoot(LegalizedNodes[OldRoot]); + + LegalizedNodes.clear(); + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); + + return Changed; +} + +SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) { + // Generic legalization: just pass the operand through. + for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i) + AddLegalizedOperand(Op.getValue(i), Result.getValue(i)); + return Result.getValue(Op.getResNo()); +} + +SDValue VectorLegalizer::LegalizeOp(SDValue Op) { + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + if (I != LegalizedNodes.end()) return I->second; + + SDNode* Node = Op.getNode(); + + // Legalize the operands + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(LegalizeOp(Node->getOperand(i))); + + SDValue Result = + DAG.UpdateNodeOperands(Op.getValue(0), Ops.data(), Ops.size()); + + bool HasVectorValue = false; + for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); + J != E; + ++J) + HasVectorValue |= J->isVector(); + if (!HasVectorValue) + return TranslateLegalizeResults(Op, Result); + + switch (Op.getOpcode()) { + default: + return TranslateLegalizeResults(Op, Result); + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::SELECT: + case ISD::SELECT_CC: + case ISD::VSETCC: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FNEG: + case ISD::FABS: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + case ISD::FPOWI: + case ISD::FPOW: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FFLOOR: + break; + } + + switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + case TargetLowering::Promote: + // "Promote" the operation by bitcasting + Result = PromoteVectorOp(Op); + Changed = true; + break; + case TargetLowering::Legal: break; + case TargetLowering::Custom: { + SDValue Tmp1 = TLI.LowerOperation(Op, DAG); + if (Tmp1.getNode()) { + Result = Tmp1; + break; + } + // FALL THROUGH + } + case TargetLowering::Expand: + if (Node->getOpcode() == ISD::FNEG) + Result = ExpandFNEG(Op); + else if (Node->getOpcode() == ISD::VSETCC) + Result = UnrollVSETCC(Op); + else + Result = UnrollVectorOp(Op); + break; + } + + // Make sure that the generated code is itself legal. + if (Result != Op) { + Result = LegalizeOp(Result); + Changed = true; + } + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + AddLegalizedOperand(Op, Result); + return Result; +} + +SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { + // Vector "promotion" is basically just bitcasting and doing the operation + // in a different type. For example, x86 promotes ISD::AND on v2i32 to + // v1i64. + MVT VT = Op.getValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + DebugLoc dl = Op.getDebugLoc(); + SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (Op.getOperand(j).getValueType().isVector()) + Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j)); + else + Operands[j] = Op.getOperand(j); + } + + Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size()); + + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op); +} + +SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { + if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { + SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); + return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + Zero, Op.getOperand(0)); + } + return UnrollVectorOp(Op); +} + +SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { + MVT VT = Op.getValueType(); + unsigned NumElems = VT.getVectorNumElements(); + MVT EltVT = VT.getVectorElementType(); + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); + MVT TmpEltVT = LHS.getValueType().getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + SmallVector<SDValue, 8> Ops(NumElems); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getIntPtrConstant(i)); + SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getIntPtrConstant(i)); + Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT), + LHSElem, RHSElem, CC); + Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i], + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), EltVT), + DAG.getConstant(0, EltVT)); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); +} + +/// UnrollVectorOp - We know that the given vector has a legal type, however +/// the operation it performs is not legal, and the target has requested that +/// the operation be expanded. "Unroll" the vector, splitting out the scalars +/// and operating on each element individually. +SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) { + MVT VT = Op.getValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't unroll a vector with multiple results!"); + unsigned NE = VT.getVectorNumElements(); + MVT EltVT = VT.getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + + SmallVector<SDValue, 8> Scalars; + SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + for (unsigned i = 0; i != NE; ++i) { + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + SDValue Operand = Op.getOperand(j); + MVT OperandVT = Operand.getValueType(); + if (OperandVT.isVector()) { + // A vector operand; extract a single element. + MVT OperandEltVT = OperandVT.getVectorElementType(); + Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + OperandEltVT, + Operand, + DAG.getConstant(i, MVT::i32)); + } else { + // A scalar operand; just use it as is. + Operands[j] = Operand; + } + } + + switch (Op.getOpcode()) { + default: + Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT, + &Operands[0], Operands.size())); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT, Operands[0], + DAG.getShiftAmountOperand(Operands[1]))); + break; + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Scalars[0], Scalars.size()); +} + +} + +bool SelectionDAG::LegalizeVectors() { + return VectorLegalizer(*this).Run(); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp new file mode 100644 index 000000000000..68967cc638fd --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -0,0 +1,2151 @@ +//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file performs vector type splitting and scalarization for LegalizeTypes. +// Scalarization is the act of changing a computation in an illegal one-element +// vector type to be a computation in its scalar element type. For example, +// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed +// as a base case when scalarizing vector arithmetic like <4 x f32>, which +// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32 +// types. +// Splitting is the act of changing a computation in an invalid vector type to +// be a computation in multiple vectors of a smaller type. For example, +// implementing <128 x f32> operations in terms of two <64 x f32> operations. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Result Vector Scalarization: <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Scalarize node result " << ResNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "ScalarizeVectorResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to scalarize the result of this operator!"); + abort(); + + case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break; + case ISD::BUILD_VECTOR: R = N->getOperand(0); break; + case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; + case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; + case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; + case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; + case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; + case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; + case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break; + + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::FABS: + case ISD::FCOS: + case ISD::FNEG: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: R = ScalarizeVecRes_UnaryOp(N); break; + + case ISD::ADD: + case ISD::AND: + case ISD::FADD: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: + case ISD::MUL: + case ISD::OR: + case ISD::SDIV: + case ISD::SREM: + case ISD::SUB: + case ISD::UDIV: + case ISD::UREM: + case ISD::XOR: R = ScalarizeVecRes_BinOp(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: R = ScalarizeVecRes_ShiftOp(N); break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) + SetScalarizedVector(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_ShiftOp(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue ShiftAmt = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + LHS.getValueType(), LHS, ShiftAmt); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) { + MVT NewVT = N->getValueType(0).getVectorElementType(); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + NewVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { + MVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + return DAG.getConvertRndSat(NewVT, N->getDebugLoc(), + Op0, DAG.getValueType(NewVT), + DAG.getValueType(Op0.getValueType()), + N->getOperand(3), + N->getOperand(4), + cast<CvtRndSatSDNode>(N)->getCvtCode()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + N->getValueType(0).getVectorElementType(), + N->getOperand(0), N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FPOWI, N->getDebugLoc(), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { + // The value to insert may have a wider type than the vector element type, + // so be sure to truncate it to the element type if necessary. + SDValue Op = N->getOperand(1); + MVT EltVT = N->getValueType(0).getVectorElementType(); + if (Op.getValueType() != EltVT) + // FIXME: Can this happen for floating point types? + Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op); + return Op; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { + assert(N->isUnindexed() && "Indexed vector load?"); + + SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getDebugLoc(), + N->getExtensionType(), + N->getValueType(0).getVectorElementType(), + N->getChain(), N->getBasePtr(), + DAG.getUNDEF(N->getBasePtr().getValueType()), + N->getSrcValue(), N->getSrcValueOffset(), + N->getMemoryVT().getVectorElementType(), + N->isVolatile(), N->getAlignment()); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { + // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. + MVT DestVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { + // If the operand is wider than the vector element type then it is implicitly + // truncated. Make that explicit here. + MVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + if (InOp.getValueType() != EltVT) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return InOp; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), N->getOperand(0), LHS, + GetScalarizedVector(N->getOperand(2))); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(), + N->getOperand(0), N->getOperand(1), + LHS, GetScalarizedVector(N->getOperand(3)), + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { + // Figure out if the scalar is the LHS or RHS and return it. + SDValue Arg = N->getOperand(2).getOperand(0); + if (Arg.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); + unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue(); + return GetScalarizedVector(N->getOperand(Op)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + MVT NVT = N->getValueType(0).getVectorElementType(); + MVT SVT = TLI.getSetCCResultType(LHS.getValueType()); + DebugLoc dl = N->getDebugLoc(); + + // Turn it into a scalar SETCC. + SDValue Res = DAG.getNode(ISD::SETCC, dl, SVT, LHS, RHS, N->getOperand(2)); + + // VSETCC always returns a sign-extended value, while SETCC may not. The + // SETCC result type may not match the vector element type. Correct these. + if (NVT.bitsLE(SVT)) { + // The SETCC result type is bigger than the vector element type. + // Ensure the SETCC result is sign-extended. + if (TLI.getBooleanContents() != + TargetLowering::ZeroOrNegativeOneBooleanContent) + Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, SVT, Res, + DAG.getValueType(MVT::i1)); + // Truncate to the final type. + return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); + } else { + // The SETCC result type is smaller than the vector element type. + // If the SetCC result is not sign-extended, chop it down to MVT::i1. + if (TLI.getBooleanContents() != + TargetLowering::ZeroOrNegativeOneBooleanContent) + Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Res); + // Sign extend to the final type. + return DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, Res); + } +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Scalarization <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue Res = SDValue(); + + if (Res.getNode() == 0) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "ScalarizeVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to scalarize this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: + Res = ScalarizeVecOp_BIT_CONVERT(N); break; + + case ISD::CONCAT_VECTORS: + Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; + + case ISD::EXTRACT_VECTOR_ELT: + Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break; + + case ISD::STORE: + Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs +/// to be scalarized, it must be <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + N->getValueType(0), Elt); +} + +/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - +/// use a BUILD_VECTOR instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { + SmallVector<SDValue, 8> Ops(N->getNumOperands()); + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) + Ops[i] = GetScalarizedVector(N->getOperand(i)); + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + &Ops[0], Ops.size()); +} + +/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to +/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the +/// index. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + return GetScalarizedVector(N->getOperand(0)); +} + +/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be +/// scalarized, it must be <1 x ty>. Just store the element. +SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(N->isUnindexed() && "Indexed store of one-element vector?"); + assert(OpNo == 1 && "Do not know how to scalarize this operand!"); + DebugLoc dl = N->getDebugLoc(); + + if (N->isTruncatingStore()) + return DAG.getTruncStore(N->getChain(), dl, + GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), + N->getSrcValue(), N->getSrcValueOffset(), + N->getMemoryVT().getVectorElementType(), + N->isVolatile(), N->getAlignment()); + + return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), + N->isVolatile(), N->getAlignment()); +} + + +//===----------------------------------------------------------------------===// +// Result Vector Splitting +//===----------------------------------------------------------------------===// + +/// SplitVectorResult - This method is called when the specified result of the +/// specified node is found to need vector splitting. At this point, the node +/// may also have invalid operands or may have other results that need +/// legalization, we just know that (at least) one result needs vector +/// splitting. +void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Split node result: "; N->dump(&DAG); cerr << "\n"); + SDValue Lo, Hi; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "SplitVectorResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to split the result of this operator!"); + abort(); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + + case ISD::BIT_CONVERT: SplitVecRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; + case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; + case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break; + case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; + case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; + case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; + case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);break; + case ISD::VECTOR_SHUFFLE: + SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; + case ISD::VSETCC: SplitVecRes_VSETCC(N, Lo, Hi); break; + + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FNEG: + case ISD::FABS: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: SplitVecRes_UnaryOp(N, Lo, Hi); break; + + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::SDIV: + case ISD::UDIV: + case ISD::FDIV: + case ISD::FPOW: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::UREM: + case ISD::SREM: + case ISD::FREM: SplitVecRes_BinOp(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetSplitVector(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDValue RHSLo, RHSHi; + GetSplitVector(N->getOperand(1), RHSLo, RHSHi); + DebugLoc dl = N->getDebugLoc(); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); +} + +void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // We know the result is a vector. The input may be either a vector or a + // scalar value. + MVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + DebugLoc dl = N->getDebugLoc(); + + SDValue InOp = N->getOperand(0); + MVT InVT = InOp.getValueType(); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + case Legal: + case PromoteInteger: + case SoftenFloat: + case ScalarizeVector: + break; + case ExpandInteger: + case ExpandFloat: + // A scalar to vector conversion, where the scalar needs expansion. + // If the vector is being split in two then we can just convert the + // expanded pieces. + if (LoVT == HiVT) { + GetExpandedOp(InOp, Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); + return; + } + break; + case SplitVector: + // If the input is a vector that needs to be split, convert each split + // piece of the input now. + GetSplitVector(InOp, Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); + return; + } + + // In the general case, convert the input to an integer and split it by hand. + MVT LoIntVT = MVT::getIntegerVT(LoVT.getSizeInBits()); + MVT HiIntVT = MVT::getIntegerVT(HiVT.getSizeInBits()); + if (TLI.isBigEndian()) + std::swap(LoIntVT, HiIntVT); + + SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + unsigned LoNumElts = LoVT.getVectorNumElements(); + SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); + Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); + + SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end()); + Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size()); +} + +void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS"); + DebugLoc dl = N->getDebugLoc(); + unsigned NumSubvectors = N->getNumOperands() / 2; + if (NumSubvectors == 1) { + Lo = N->getOperand(0); + Hi = N->getOperand(1); + return; + } + + MVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); + Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); + + SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end()); + Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size()); +} + +void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + SDValue DTyOpLo = DAG.getValueType(LoVT); + SDValue DTyOpHi = DAG.getValueType(HiVT); + + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + + // Split the input. + SDValue VLo, VHi; + MVT InVT = N->getOperand(0).getValueType(); + switch (getTypeAction(InVT)) { + default: assert(0 && "Unexpected type action!"); + case Legal: { + assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(0)); + VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + case SplitVector: + GetSplitVector(N->getOperand(0), VLo, VHi); + break; + case WidenVector: { + // If the result needs to be split and the input needs to be widened, + // the two types must have different lengths. Use the widened result + // and extract from it to do the split. + assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + } + + SDValue STyOpLo = DAG.getValueType(VLo.getValueType()); + SDValue STyOpHi = DAG.getValueType(VHi.getValueType()); + + Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp, + CvtCode); + Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp, + CvtCode); +} + +void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + MVT IdxVT = Idx.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + MVT LoVT, HiVT; + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + // The indices are not guaranteed to be a multiple of the new vector + // size unless the original vector type was split in two. + assert(LoVT == HiVT && "Non power-of-two vectors not supported!"); + + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); + Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx, + DAG.getConstant(LoVT.getVectorNumElements(), IdxVT)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx); +} + +void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, + SDValue &Hi) { + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); +} + +void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Elt = N->getOperand(1); + SDValue Idx = N->getOperand(2); + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(Vec, Lo, Hi); + + if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) { + unsigned IdxVal = CIdx->getZExtValue(); + unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); + if (IdxVal < LoNumElts) + Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, + Lo.getValueType(), Lo, Elt, Idx); + else + Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, + DAG.getIntPtrConstant(IdxVal - LoNumElts)); + return; + } + + // Spill the vector to the stack. + MVT VecVT = Vec.getValueType(); + MVT EltVT = VecVT.getVectorElementType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0); + + // Store the new element. This may be larger than the vector element type, + // so use a truncating store. + SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + unsigned Alignment = + TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForMVT()); + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT); + + // Load the Lo part from the stack slot. + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0); + + // Increment the pointer to the other part. + unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getIntPtrConstant(IncrementSize)); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false, + MinAlign(Alignment, IncrementSize)); +} + +void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); + Hi = DAG.getUNDEF(HiVT); +} + +void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); + MVT LoVT, HiVT; + DebugLoc dl = LD->getDebugLoc(); + GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); + const Value *SV = LD->getSrcValue(); + int SVOffset = LD->getSrcValueOffset(); + MVT MemoryVT = LD->getMemoryVT(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + + MVT LoMemVT, HiMemVT; + GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + + Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset, + SV, SVOffset, LoMemVT, isVolatile, Alignment); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + SVOffset += IncrementSize; + Alignment = MinAlign(Alignment, IncrementSize); + Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset, + SV, SVOffset, HiMemVT, isVolatile, Alignment); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), Ch); +} + +void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Get the dest types - they may not match the input types, e.g. int_to_fp. + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + // Split the input. + MVT InVT = N->getOperand(0).getValueType(); + switch (getTypeAction(InVT)) { + default: assert(0 && "Unexpected type action!"); + case Legal: { + assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + case SplitVector: + GetSplitVector(N->getOperand(0), Lo, Hi); + break; + case WidenVector: { + // If the result needs to be split and the input needs to be widened, + // the two types must have different lengths. Use the widened result + // and extract from it to do the split. + assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + break; + } + } + + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, + SDValue &Lo, SDValue &Hi) { + // The low and high parts of the original input give four input vectors. + SDValue Inputs[4]; + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); + GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); + MVT NewVT = Inputs[0].getValueType(); + unsigned NewElts = NewVT.getVectorNumElements(); + assert(NewVT == Inputs[1].getValueType() && + "Non power-of-two vectors not supported!"); + + // If Lo or Hi uses elements from at most two of the four input vectors, then + // express it as a vector shuffle of those two inputs. Otherwise extract the + // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. + SmallVector<int, 16> Ops; + for (unsigned High = 0; High < 2; ++High) { + SDValue &Output = High ? Hi : Lo; + + // Build a shuffle mask for the output, discovering on the fly which + // input vectors to use as shuffle operands (recorded in InputUsed). + // If building a suitable shuffle vector proves too hard, then bail + // out with useBuildVector set. + unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered. + unsigned FirstMaskIdx = High * NewElts; + bool useBuildVector = false; + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element does not index into any input vector. + Ops.push_back(-1); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Find or create a shuffle vector operand to hold this input. + unsigned OpNo; + for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { + if (InputUsed[OpNo] == Input) { + // This input vector is already an operand. + break; + } else if (InputUsed[OpNo] == -1U) { + // Create a new operand for this input vector. + InputUsed[OpNo] = Input; + break; + } + } + + if (OpNo >= array_lengthof(InputUsed)) { + // More than two input vectors used! Give up on trying to create a + // shuffle vector. Insert all elements into a BUILD_VECTOR instead. + useBuildVector = true; + break; + } + + // Add the mask index for the new shuffle vector. + Ops.push_back(Idx + OpNo * NewElts); + } + + if (useBuildVector) { + MVT EltVT = NewVT.getVectorElementType(); + SmallVector<SDValue, 16> SVOps; + + // Extract the input elements by hand. + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element is "undef" or indexes off the end of the input. + SVOps.push_back(DAG.getUNDEF(EltVT)); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Extract the vector element by hand. + SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Inputs[Input], DAG.getIntPtrConstant(Idx))); + } + + // Construct the Lo/Hi output using a BUILD_VECTOR. + Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size()); + } else if (InputUsed[0] == -1U) { + // No input vectors were used! The result is undefined. + Output = DAG.getUNDEF(NewVT); + } else { + SDValue Op0 = Inputs[InputUsed[0]]; + // If only one input was used, use an undefined vector for the other. + SDValue Op1 = InputUsed[1] == -1U ? + DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; + // At least one input vector was used. Create a new shuffle vector. + Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]); + } + + Ops.clear(); + } +} + +void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, + SDValue &Hi) { + MVT LoVT, HiVT; + DebugLoc dl = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + SDValue LL, LH, RL, RH; + GetSplitVector(N->getOperand(0), LL, LH); + GetSplitVector(N->getOperand(1), RL, RH); + + Lo = DAG.getNode(ISD::VSETCC, dl, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(ISD::VSETCC, dl, HiVT, LH, RH, N->getOperand(2)); +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Splitting +//===----------------------------------------------------------------------===// + +/// SplitVectorOperand - This method is called when the specified operand of the +/// specified node is found to need vector splitting. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need legalization as well as the specified one. +bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(cerr << "Split node operand: "; N->dump(&DAG); cerr << "\n"); + SDValue Res = SDValue(); + + if (Res.getNode() == 0) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "SplitVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to split this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), + OpNo); break; + + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: Res = SplitVecOp_UnaryOp(N); break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { + // The result has a legal vector type, but the input needs splitting. + MVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + DebugLoc dl = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo, Hi); + assert(Lo.getValueType() == Hi.getValueType() && + "Returns legal non-power-of-two vector type?"); + MVT InVT = Lo.getValueType(); + + MVT OutVT = MVT::getVectorVT(ResVT.getVectorElementType(), + InVT.getVectorNumElements()); + + Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) { + // For example, i64 = BIT_CONVERT v4i16 on alpha. Typically the vector will + // end up being split all the way down to individual components. Convert the + // split pieces into integers and reassemble. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0), + JoinIntegers(Lo, Hi)); +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { + // We know that the extracted result type is legal. For now, assume the index + // is a constant. + MVT SubVT = N->getValueType(0); + SDValue Idx = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + if (IdxVal < LoElts) { + assert(IdxVal + SubVT.getVectorNumElements() <= LoElts && + "Extracted subvector crosses vector split!"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); + } else { + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, + DAG.getConstant(IdxVal - LoElts, Idx.getValueType())); + } +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + MVT VecVT = Vec.getValueType(); + + if (isa<ConstantSDNode>(Idx)) { + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!"); + + SDValue Lo, Hi; + GetSplitVector(Vec, Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + + if (IdxVal < LoElts) + return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx); + else + return DAG.UpdateNodeOperands(SDValue(N, 0), Hi, + DAG.getConstant(IdxVal - LoElts, + Idx.getValueType())); + } + + // Store the vector to the stack. + MVT EltVT = VecVT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0); + + // Load back the required element. + StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + return DAG.getLoad(EltVT, dl, Store, StackPtr, SV, 0); +} + +SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed store of vector?"); + assert(OpNo == 1 && "Can only split the stored value"); + DebugLoc dl = N->getDebugLoc(); + + bool isTruncating = N->isTruncatingStore(); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + int SVOffset = N->getSrcValueOffset(); + MVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getAlignment(); + bool isVol = N->isVolatile(); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(1), Lo, Hi); + + MVT LoMemVT, HiMemVT; + GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + + if (isTruncating) + Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + LoMemVT, isVol, Alignment); + else + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + isVol, Alignment); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + + if (isTruncating) + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, + N->getSrcValue(), SVOffset+IncrementSize, + HiMemVT, + isVol, MinAlign(Alignment, IncrementSize)); + else + Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset+IncrementSize, + isVol, MinAlign(Alignment, IncrementSize)); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Result Vector Widening +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Widen node result " << ResNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue Res = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "WidenVectorResult #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to widen the result of this operator!"); + abort(); + + case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break; + case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; + case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; + case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; + case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; + case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: + Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break; + case ISD::VSETCC: Res = WidenVecRes_VSETCC(N); break; + + case ISD::ADD: + case ISD::AND: + case ISD::BSWAP: + case ISD::FADD: + case ISD::FCOPYSIGN: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FPOWI: + case ISD::FREM: + case ISD::FSUB: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::OR: + case ISD::SDIV: + case ISD::SREM: + case ISD::UDIV: + case ISD::UREM: + case ISD::SUB: + case ISD::XOR: Res = WidenVecRes_Binary(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: Res = WidenVecRes_Shift(N); break; + + case ISD::ANY_EXTEND: + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::ZERO_EXTEND: + case ISD::UINT_TO_FP: Res = WidenVecRes_Convert(N); break; + + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::FABS: + case ISD::FCOS: + case ISD::FNEG: + case ISD::FSIN: + case ISD::FSQRT: Res = WidenVecRes_Unary(N); break; + } + + // If Res is null, the sub-method took care of registering the result. + if (Res.getNode()) + SetWidenedVector(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { + // Binary op widening. + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { + SDValue InOp = N->getOperand(0); + DebugLoc dl = N->getDebugLoc(); + + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + MVT InVT = InOp.getValueType(); + MVT InEltVT = InVT.getVectorElementType(); + MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts); + + unsigned Opcode = N->getOpcode(); + unsigned InVTNumElts = InVT.getVectorNumElements(); + + if (getTypeAction(InVT) == WidenVector) { + InOp = GetWidenedVector(N->getOperand(0)); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) + return DAG.getNode(Opcode, dl, WidenVT, InOp); + } + + if (TLI.isTypeLegal(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + Ops[0] = InOp; + SDValue UndefVal = DAG.getUNDEF(InVT); + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + return DAG.getNode(Opcode, dl, WidenVT, + DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, + &Ops[0], NumConcat)); + } + + if (InVTNumElts % WidenNumElts == 0) { + // Extract the input and convert the shorten input vector. + return DAG.getNode(Opcode, dl, WidenVT, + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, + InOp, DAG.getIntPtrConstant(0))); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + MVT EltVT = WidenVT.getVectorElementType(); + unsigned MinElts = std::min(InVTNumElts, WidenNumElts); + unsigned i; + for (i=0; i < MinElts; ++i) + Ops[i] = DAG.getNode(Opcode, dl, EltVT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getIntPtrConstant(i))); + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + + MVT ShVT = ShOp.getValueType(); + if (getTypeAction(ShVT) == WidenVector) { + ShOp = GetWidenedVector(ShOp); + ShVT = ShOp.getValueType(); + } + MVT ShWidenVT = MVT::getVectorVT(ShVT.getVectorElementType(), + WidenVT.getVectorNumElements()); + if (ShVT != ShWidenVT) + ShOp = ModifyToType(ShOp, ShWidenVT); + + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { + // Unary op widening. + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { + SDValue InOp = N->getOperand(0); + MVT InVT = InOp.getValueType(); + MVT VT = N->getValueType(0); + MVT WidenVT = TLI.getTypeToTransformTo(VT); + DebugLoc dl = N->getDebugLoc(); + + switch (getTypeAction(InVT)) { + default: + assert(false && "Unknown type action!"); + break; + case Legal: + break; + case PromoteInteger: + // If the InOp is promoted to the same size, convert it. Otherwise, + // fall out of the switch and widen the promoted input. + InOp = GetPromotedInteger(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp); + break; + case SoftenFloat: + case ExpandInteger: + case ExpandFloat: + case ScalarizeVector: + case SplitVector: + break; + case WidenVector: + // If the InOp is widened to the same size, convert it. Otherwise, fall + // out of the switch and widen the widened input. + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + // The input widens to the same size. Convert to the widen value. + return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp); + break; + } + + unsigned WidenSize = WidenVT.getSizeInBits(); + unsigned InSize = InVT.getSizeInBits(); + if (WidenSize % InSize == 0) { + // Determine new input vector type. The new input vector type will use + // the same element type (if its a vector) or use the input type as a + // vector. It is the same size as the type to widen to. + MVT NewInVT; + unsigned NewNumElts = WidenSize / InSize; + if (InVT.isVector()) { + MVT InEltVT = InVT.getVectorElementType(); + NewInVT= MVT::getVectorVT(InEltVT, WidenSize / InEltVT.getSizeInBits()); + } else { + NewInVT = MVT::getVectorVT(InVT, NewNumElts); + } + + if (TLI.isTypeLegal(NewInVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + SmallVector<SDValue, 16> Ops(NewNumElts); + SDValue UndefVal = DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i < NewNumElts; ++i) + Ops[i] = UndefVal; + + SDValue NewVec; + if (InVT.isVector()) + NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NewInVT, &Ops[0], NewNumElts); + else + NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, + NewInVT, &Ops[0], NewNumElts); + return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec); + } + } + + // This should occur rarely. Lower the bit-convert to a store/load + // from the stack. Create the stack frame object. Make sure it is aligned + // for both the source and destination types. + SDValue FIPtr = DAG.CreateStackTemporary(InVT, WidenVT); + int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); + + // Result is a load from the stack slot. + return DAG.getLoad(WidenVT, dl, Store, FIPtr, SV, 0); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + // Build a vector with undefined for the new nodes. + MVT VT = N->getValueType(0); + MVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + + MVT WidenVT = TLI.getTypeToTransformTo(VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end()); + NewOps.reserve(WidenNumElts); + for (unsigned i = NumElts; i < WidenNumElts; ++i) + NewOps.push_back(DAG.getUNDEF(EltVT)); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size()); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { + MVT InVT = N->getOperand(0).getValueType(); + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + DebugLoc dl = N->getDebugLoc(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + unsigned NumOperands = N->getNumOperands(); + + bool InputWidened = false; // Indicates we need to widen the input. + if (getTypeAction(InVT) != WidenVector) { + if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) { + // Add undef vectors to widen to correct length. + unsigned NumConcat = WidenVT.getVectorNumElements() / + InVT.getVectorNumElements(); + SDValue UndefVal = DAG.getUNDEF(InVT); + SmallVector<SDValue, 16> Ops(NumConcat); + for (unsigned i=0; i < NumOperands; ++i) + Ops[i] = N->getOperand(i); + for (unsigned i = NumOperands; i != NumConcat; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat); + } + } else { + InputWidened = true; + if (WidenVT == TLI.getTypeToTransformTo(InVT)) { + // The inputs and the result are widen to the same value. + unsigned i; + for (i=1; i < NumOperands; ++i) + if (N->getOperand(i).getOpcode() != ISD::UNDEF) + break; + + if (i > NumOperands) + // Everything but the first operand is an UNDEF so just return the + // widened first operand. + return GetWidenedVector(N->getOperand(0)); + + if (NumOperands == 2) { + // Replace concat of two operands with a shuffle. + SmallVector<int, 16> MaskOps(WidenNumElts); + for (unsigned i=0; i < WidenNumElts/2; ++i) { + MaskOps[i] = i; + MaskOps[i+WidenNumElts/2] = i+WidenNumElts; + } + return DAG.getVectorShuffle(WidenVT, dl, + GetWidenedVector(N->getOperand(0)), + GetWidenedVector(N->getOperand(1)), + &MaskOps[0]); + } + } + } + + // Fall back to use extracts and build vector. + MVT EltVT = WidenVT.getVectorElementType(); + unsigned NumInElts = InVT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(WidenNumElts); + unsigned Idx = 0; + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (InputWidened) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(j)); + } + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue InOp = N->getOperand(0); + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + MVT InVT = InOp.getValueType(); + MVT InEltVT = InVT.getVectorElementType(); + MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts); + + SDValue DTyOp = DAG.getValueType(WidenVT); + SDValue STyOp = DAG.getValueType(InWidenVT); + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + + unsigned InVTNumElts = InVT.getVectorNumElements(); + if (getTypeAction(InVT) == WidenVector) { + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + if (TLI.isTypeLegal(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + Ops[0] = InOp; + SDValue UndefVal = DAG.getUNDEF(InVT); + for (unsigned i = 1; i != NumConcat; ++i) { + Ops[i] = UndefVal; + } + InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat); + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + if (InVTNumElts % WidenNumElts == 0) { + // Extract the input and convert the shorten input vector. + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, + DAG.getIntPtrConstant(0)); + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + MVT EltVT = WidenVT.getVectorElementType(); + DTyOp = DAG.getValueType(EltVT); + STyOp = DAG.getValueType(InEltVT); + + unsigned MinElts = std::min(InVTNumElts, WidenNumElts); + unsigned i; + for (i=0; i < MinElts; ++i) { + SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getIntPtrConstant(i)); + Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + MVT VT = N->getValueType(0); + MVT WidenVT = TLI.getTypeToTransformTo(VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + SDValue Idx = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + if (getTypeAction(InOp.getValueType()) == WidenVector) + InOp = GetWidenedVector(InOp); + + MVT InVT = InOp.getValueType(); + + ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx); + if (CIdx) { + unsigned IdxVal = CIdx->getZExtValue(); + // Check if we can just return the input vector after widening. + if (IdxVal == 0 && InVT == WidenVT) + return InOp; + + // Check if we can extract from the vector. + unsigned InNumElts = InVT.getVectorNumElements(); + if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); + } + + // We could try widening the input to the right length but for now, extract + // the original elements, fill the rest with undefs and build a vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + MVT EltVT = VT.getVectorElementType(); + MVT IdxVT = Idx.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned i; + if (CIdx) { + unsigned IdxVal = CIdx->getZExtValue(); + for (i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(IdxVal+i, IdxVT)); + } else { + Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx); + for (i=1; i < NumElts; ++i) { + SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + DAG.getConstant(i, IdxVT)); + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx); + } + } + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +} + +SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(), + InOp.getValueType(), InOp, + N->getOperand(1), N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); + MVT WidenVT = TLI.getTypeToTransformTo(LD->getValueType(0)); + MVT LdVT = LD->getMemoryVT(); + DebugLoc dl = N->getDebugLoc(); + assert(LdVT.isVector() && WidenVT.isVector()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Align = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + const Value *SV = LD->getSrcValue(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + SDValue Result; + SmallVector<SDValue, 16> LdChain; // Chain for the series of load + if (ExtType != ISD::NON_EXTLOAD) { + // For extension loads, we can not play the tricks of chopping legal + // vector types and bit cast it to the right type. Instead, we unroll + // the load and build a vector. + MVT EltVT = WidenVT.getVectorElementType(); + MVT LdEltVT = LdVT.getVectorElementType(); + unsigned NumElts = LdVT.getVectorNumElements(); + + // Load each element and widen + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(WidenNumElts); + unsigned Increment = LdEltVT.getSizeInBits() / 8; + Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset, + LdEltVT, isVolatile, Align); + LdChain.push_back(Ops[0].getValue(1)); + unsigned i = 0, Offset = Increment; + for (i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, DAG.getIntPtrConstant(Offset)); + Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV, + SVOffset + Offset, LdEltVT, isVolatile, Align); + LdChain.push_back(Ops[i].getValue(1)); + } + + // Fill the rest with undefs + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i != WidenNumElts; ++i) + Ops[i] = UndefVal; + + Result = DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size()); + } else { + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); + unsigned int LdWidth = LdVT.getSizeInBits(); + Result = GenWidenVectorLoads(LdChain, Chain, BasePtr, SV, SVOffset, + Align, isVolatile, LdWidth, WidenVT, dl); + } + + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LdChain[0], + LdChain.size()); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Chain); + + return Result; +} + +SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(), + WidenVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue Cond1 = N->getOperand(0); + MVT CondVT = Cond1.getValueType(); + if (CondVT.isVector()) { + MVT CondEltVT = CondVT.getVectorElementType(); + MVT CondWidenVT = MVT::getVectorVT(CondEltVT, WidenNumElts); + if (getTypeAction(CondVT) == WidenVector) + Cond1 = GetWidenedVector(Cond1); + + if (Cond1.getValueType() != CondWidenVT) + Cond1 = ModifyToType(Cond1, CondWidenVT); + } + + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + SDValue InOp2 = GetWidenedVector(N->getOperand(2)); + assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + WidenVT, Cond1, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { + SDValue InOp1 = GetWidenedVector(N->getOperand(2)); + SDValue InOp2 = GetWidenedVector(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + InOp1.getValueType(), N->getOperand(0), + N->getOperand(1), InOp1, InOp2, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + return DAG.getUNDEF(WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + MVT WidenVT = TLI.getTypeToTransformTo(VT); + unsigned NumElts = VT.getVectorNumElements(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Adjust mask based on new input vector length. + SmallVector<int, 16> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = N->getMaskElt(i); + if (Idx < (int)NumElts) + NewMask.push_back(Idx); + else + NewMask.push_back(Idx - NumElts + WidenNumElts); + } + for (unsigned i = NumElts; i != WidenNumElts; ++i) + NewMask.push_back(-1); + return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { + MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = N->getOperand(0); + MVT InVT = InOp1.getValueType(); + assert(InVT.isVector() && "can not widen non vector type"); + MVT WidenInVT = MVT::getVectorVT(InVT.getVectorElementType(), WidenNumElts); + InOp1 = GetWidenedVector(InOp1); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Assume that the input and output will be widen appropriately. If not, + // we will have to unroll it at some point. + assert(InOp1.getValueType() == WidenInVT && + InOp2.getValueType() == WidenInVT && + "Input not widened to expected type!"); + return DAG.getNode(ISD::VSETCC, N->getDebugLoc(), + WidenVT, InOp1, InOp2, N->getOperand(2)); +} + + +//===----------------------------------------------------------------------===// +// Widen Vector Operand +//===----------------------------------------------------------------------===// +bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { + DEBUG(cerr << "Widen node operand " << ResNo << ": "; N->dump(&DAG); + cerr << "\n"); + SDValue Res = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + cerr << "WidenVectorOperand op #" << ResNo << ": "; + N->dump(&DAG); cerr << "\n"; +#endif + assert(0 && "Do not know how to widen this operator's operand!"); + abort(); + + case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::STORE: Res = WidenVecOp_STORE(N); break; + + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: Res = WidenVecOp_Convert(N); break; + } + + // If Res is null, the sub-method took care of registering the result. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { + // Since the result is legal and the input is illegal, it is unlikely + // that we can fix the input to a legal type so unroll the convert + // into some scalar code and create a nasty build vector. + MVT VT = N->getValueType(0); + MVT EltVT = VT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + unsigned NumElts = VT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + if (getTypeAction(InOp.getValueType()) == WidenVector) + InOp = GetWidenedVector(InOp); + MVT InVT = InOp.getValueType(); + MVT InEltVT = InVT.getVectorElementType(); + + unsigned Opcode = N->getOpcode(); + SmallVector<SDValue, 16> Ops(NumElts); + for (unsigned i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode(Opcode, dl, EltVT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getIntPtrConstant(i))); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { + MVT VT = N->getValueType(0); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + MVT InWidenVT = InOp.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + // Check if we can convert between two legal vector types and extract. + unsigned InWidenSize = InWidenVT.getSizeInBits(); + unsigned Size = VT.getSizeInBits(); + if (InWidenSize % Size == 0 && !VT.isVector()) { + unsigned NewNumElts = InWidenSize / Size; + MVT NewVT = MVT::getVectorVT(VT, NewNumElts); + if (TLI.isTypeLegal(NewVT)) { + SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, + DAG.getIntPtrConstant(0)); + } + } + + // Lower the bit-convert to a store/load from the stack. Create the stack + // frame object. Make sure it is aligned for both the source and destination + // types. + SDValue FIPtr = DAG.CreateStackTemporary(InWidenVT, VT); + int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); + + // Result is a load from the stack slot. + return DAG.getLoad(VT, dl, Store, FIPtr, SV, 0); +} + +SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { + // If the input vector is not legal, it is likely that we will not find a + // legal vector of the same size. Replace the concatenate vector with a + // nasty build vector. + MVT VT = N->getValueType(0); + MVT EltVT = VT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(NumElts); + + MVT InVT = N->getOperand(0).getValueType(); + unsigned NumInElts = InVT.getVectorNumElements(); + + unsigned Idx = 0; + unsigned NumOperands = N->getNumOperands(); + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (getTypeAction(InOp.getValueType()) == WidenVector) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(j)); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + MVT EltVT = InOp.getValueType().getVectorElementType(); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + EltVT, InOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { + // We have to widen the value but we want only to store the original + // vector type. + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + const Value *SV = ST->getSrcValue(); + int SVOffset = ST->getSrcValueOffset(); + unsigned Align = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + DebugLoc dl = N->getDebugLoc(); + + MVT StVT = ST->getMemoryVT(); + MVT ValVT = ValOp.getValueType(); + // It must be true that we the widen vector type is bigger than where + // we need to store. + assert(StVT.isVector() && ValOp.getValueType().isVector()); + assert(StVT.bitsLT(ValOp.getValueType())); + + SmallVector<SDValue, 16> StChain; + if (ST->isTruncatingStore()) { + // For truncating stores, we can not play the tricks of chopping legal + // vector types and bit cast it to the right type. Instead, we unroll + // the store. + MVT StEltVT = StVT.getVectorElementType(); + MVT ValEltVT = ValVT.getVectorElementType(); + unsigned Increment = ValEltVT.getSizeInBits() / 8; + unsigned NumElts = StVT.getVectorNumElements(); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getIntPtrConstant(0)); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV, + SVOffset, StEltVT, + isVolatile, Align)); + unsigned Offset = Increment; + for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, DAG.getIntPtrConstant(Offset)); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getIntPtrConstant(0)); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV, + SVOffset + Offset, StEltVT, + isVolatile, MinAlign(Align, Offset))); + } + } + else { + assert(StVT.getVectorElementType() == ValVT.getVectorElementType()); + // Store value + GenWidenVectorStores(StChain, Chain, BasePtr, SV, SVOffset, + Align, isVolatile, ValOp, StVT.getSizeInBits(), dl); + } + if (StChain.size() == 1) + return StChain[0]; + else + return DAG.getNode(ISD::TokenFactor, dl, + MVT::Other,&StChain[0],StChain.size()); +} + +//===----------------------------------------------------------------------===// +// Vector Widening Utilities +//===----------------------------------------------------------------------===// + + +// Utility function to find a vector type and its associated element +// type from a preferred width and whose vector type must be the same size +// as the VecVT. +// TLI: Target lowering used to determine legal types. +// Width: Preferred width to store. +// VecVT: Vector value type whose size we must match. +// Returns NewVecVT and NewEltVT - the vector type and its associated +// element type. +static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width, + MVT VecVT, + MVT& NewEltVT, MVT& NewVecVT) { + unsigned EltWidth = Width + 1; + if (TLI.isTypeLegal(VecVT)) { + // We start with the preferred with, making it a power of 2 and find a + // legal vector type of that width. If not, we reduce it by another of 2. + // For incoming type is legal, this process will end as a vector of the + // smallest loadable type should always be legal. + do { + assert(EltWidth > 0); + EltWidth = 1 << Log2_32(EltWidth - 1); + NewEltVT = MVT::getIntegerVT(EltWidth); + unsigned NumElts = VecVT.getSizeInBits() / EltWidth; + NewVecVT = MVT::getVectorVT(NewEltVT, NumElts); + } while (!TLI.isTypeLegal(NewVecVT) || + VecVT.getSizeInBits() != NewVecVT.getSizeInBits()); + } else { + // The incoming vector type is illegal and is the result of widening + // a vector to a power of 2. In this case, we will use the preferred + // with as long as it is a multiple of the incoming vector length. + // The legalization process will eventually make this into a legal type + // and remove the illegal bit converts (which would turn to stack converts + // if they are allow to exist). + do { + assert(EltWidth > 0); + EltWidth = 1 << Log2_32(EltWidth - 1); + NewEltVT = MVT::getIntegerVT(EltWidth); + unsigned NumElts = VecVT.getSizeInBits() / EltWidth; + NewVecVT = MVT::getVectorVT(NewEltVT, NumElts); + } while (!TLI.isTypeLegal(NewEltVT) || + VecVT.getSizeInBits() != NewVecVT.getSizeInBits()); + } +} + +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, + SDValue Chain, + SDValue BasePtr, + const Value *SV, + int SVOffset, + unsigned Alignment, + bool isVolatile, + unsigned LdWidth, + MVT ResType, + DebugLoc dl) { + // The strategy assumes that we can efficiently load powers of two widths. + // The routines chops the vector into the largest power of 2 load and + // can be inserted into a legal vector and then cast the result into the + // vector type we want. This avoids unnecessary stack converts. + + // TODO: If the Ldwidth is legal, alignment is the same as the LdWidth, and + // the load is nonvolatile, we an use a wider load for the value. + + // Find the vector type that can load from. + MVT NewEltVT, NewVecVT; + unsigned NewEltVTWidth; + FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT); + NewEltVTWidth = NewEltVT.getSizeInBits(); + + SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset, + isVolatile, Alignment); + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); + LdChain.push_back(LdOp.getValue(1)); + + // Check if we can load the element with one instruction + if (LdWidth == NewEltVTWidth) { + return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp); + } + + unsigned Idx = 1; + LdWidth -= NewEltVTWidth; + unsigned Offset = 0; + + while (LdWidth > 0) { + unsigned Increment = NewEltVTWidth / 8; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(Increment)); + + if (LdWidth < NewEltVTWidth) { + // Our current type we are using is too large, use a smaller size by + // using a smaller power of 2 + unsigned oNewEltVTWidth = NewEltVTWidth; + FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT); + NewEltVTWidth = NewEltVT.getSizeInBits(); + // Readjust position and vector position based on new load type + Idx = Idx * (oNewEltVTWidth/NewEltVTWidth); + VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); + } + + SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, + SVOffset+Offset, isVolatile, + MinAlign(Alignment, Offset)); + LdChain.push_back(LdOp.getValue(1)); + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOp, + DAG.getIntPtrConstant(Idx++)); + + LdWidth -= NewEltVTWidth; + } + + return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp); +} + +void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, + SDValue Chain, + SDValue BasePtr, + const Value *SV, + int SVOffset, + unsigned Alignment, + bool isVolatile, + SDValue ValOp, + unsigned StWidth, + DebugLoc dl) { + // Breaks the stores into a series of power of 2 width stores. For any + // width, we convert the vector to the vector of element size that we + // want to store. This avoids requiring a stack convert. + + // Find a width of the element type we can store with + MVT WidenVT = ValOp.getValueType(); + MVT NewEltVT, NewVecVT; + + FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT); + unsigned NewEltVTWidth = NewEltVT.getSizeInBits(); + + SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp, + DAG.getIntPtrConstant(0)); + SDValue StOp = DAG.getStore(Chain, dl, EOp, BasePtr, SV, SVOffset, + isVolatile, Alignment); + StChain.push_back(StOp); + + // Check if we are done + if (StWidth == NewEltVTWidth) { + return; + } + + unsigned Idx = 1; + StWidth -= NewEltVTWidth; + unsigned Offset = 0; + + while (StWidth > 0) { + unsigned Increment = NewEltVTWidth / 8; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(Increment)); + + if (StWidth < NewEltVTWidth) { + // Our current type we are using is too large, use a smaller size by + // using a smaller power of 2 + unsigned oNewEltVTWidth = NewEltVTWidth; + FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT); + NewEltVTWidth = NewEltVT.getSizeInBits(); + // Readjust position and vector position based on new load type + Idx = Idx * (oNewEltVTWidth/NewEltVTWidth); + VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); + } + + EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp, + DAG.getIntPtrConstant(Idx++)); + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, + SVOffset + Offset, isVolatile, + MinAlign(Alignment, Offset))); + StWidth -= NewEltVTWidth; + } +} + +/// Modifies a vector input (widen or narrows) to a vector of NVT. The +/// input vector must have the same element type as NVT. +SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) { + // Note that InOp might have been widened so it might already have + // the right width or it might need be narrowed. + MVT InVT = InOp.getValueType(); + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + DebugLoc dl = InOp.getDebugLoc(); + + // Check if InOp already has the right width. + if (InVT == NVT) + return InOp; + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { + unsigned NumConcat = WidenNumElts / InNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue UndefVal = DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat); + } + + if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, + DAG.getIntPtrConstant(0)); + + // Fall back to extract and build. + SmallVector<SDValue, 16> Ops(WidenNumElts); + MVT EltVT = NVT.getVectorElementType(); + unsigned MinNumElts = std::min(WidenNumElts, InNumElts); + unsigned Idx; + for (Idx = 0; Idx < MinNumElts; ++Idx) + Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(Idx)); + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for ( ; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts); +} diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile new file mode 100644 index 000000000000..185222ade98e --- /dev/null +++ b/lib/CodeGen/SelectionDAG/Makefile @@ -0,0 +1,15 @@ +##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMSelectionDAG +PARALLEL_DIRS = +BUILD_ARCHIVE = 1 +DONT_BUILD_RELINKED = 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp new file mode 100644 index 000000000000..af73b28fae93 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -0,0 +1,635 @@ +//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a fast scheduler. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +STATISTIC(NumUnfolds, "Number of nodes unfolded"); +STATISTIC(NumDups, "Number of duplicated nodes"); +STATISTIC(NumPRCopies, "Number of physical copies"); + +static RegisterScheduler + fastDAGScheduler("fast", "Fast suboptimal list scheduling", + createFastDAGScheduler); + +namespace { + /// FastPriorityQueue - A degenerate priority queue that considers + /// all nodes to have the same priority. + /// + struct VISIBILITY_HIDDEN FastPriorityQueue { + SmallVector<SUnit *, 16> Queue; + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + Queue.push_back(U); + } + + SUnit *pop() { + if (empty()) return NULL; + SUnit *V = Queue.back(); + Queue.pop_back(); + return V; + } + }; + +//===----------------------------------------------------------------------===// +/// ScheduleDAGFast - The actual "fast" list scheduler implementation. +/// +class VISIBILITY_HIDDEN ScheduleDAGFast : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + FastPriorityQueue AvailableQueue; + + /// LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + unsigned NumLiveRegs; + std::vector<SUnit*> LiveRegDefs; + std::vector<unsigned> LiveRegCycles; + +public: + ScheduleDAGFast(MachineFunction &mf) + : ScheduleDAGSDNodes(mf) {} + + void Schedule(); + + /// AddPred - adds a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + void AddPred(SUnit *SU, const SDep &D) { + SU->addPred(D); + } + + /// RemovePred - removes a predecessor edge from SUnit SU. + /// This returns true if an edge was removed. + void RemovePred(SUnit *SU, const SDep &D) { + SU->removePred(D); + } + +private: + void ReleasePred(SUnit *SU, SDep *PredEdge); + void ReleasePredecessors(SUnit *SU, unsigned CurCycle); + void ScheduleNodeBottomUp(SUnit*, unsigned); + SUnit *CopyAndMoveSuccessors(SUnit*); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVector<SUnit*, 2>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + void ListScheduleBottomUp(); + + /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies. + bool ForceUnitLatencies() const { return true; } +}; +} // end anonymous namespace + + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGFast::Schedule() { + DOUT << "********** List Scheduling **********\n"; + + NumLiveRegs = 0; + LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegCycles.resize(TRI->getNumRegs(), 0); + + // Build the scheduling graph. + BuildSchedGraph(); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + // Execute the actual scheduling loop. + ListScheduleBottomUp(); +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + --PredSU->NumSuccsLeft; + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft < 0) { + cerr << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + // If all the node's successors are scheduled, this node is ready + // to be scheduled. Ignore the special EntrySU node. + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { + PredSU->isAvailable = true; + AvailableQueue.push(PredSU); + } +} + +void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { + // Bottom up: release predecessors + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + ReleasePred(SU, &*I); + if (I->isAssignedRegDep()) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + if (!LiveRegDefs[I->getReg()]) { + ++NumLiveRegs; + LiveRegDefs[I->getReg()] = I->getSUnit(); + LiveRegCycles[I->getReg()] = CurCycle; + } + } + } +} + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(this)); + + assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); + SU->setHeightToAtLeast(CurCycle); + Sequence.push_back(SU); + + ReleasePredecessors(SU, CurCycle); + + // Release all the implicit physical register defs that are live. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == SU && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegCycles[I->getReg()] = 0; + } + } + } + + SU->isScheduled = true; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { + if (SU->getNode()->getFlaggedNode()) + return NULL; + + SDNode *N = SU->getNode(); + if (!N) + return NULL; + + SUnit *NewSU; + bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getValueType(i); + if (VT == MVT::Flag) + return NULL; + else if (VT == MVT::Other) + TryUnfold = true; + } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDValue &Op = N->getOperand(i); + MVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (VT == MVT::Flag) + return NULL; + } + + if (TryUnfold) { + SmallVector<SDNode*, 2> NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return NULL; + + DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), + SDValue(LoadNode, 1)); + + SUnit *NewSU = NewSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != TID.getNumOperands(); ++i) { + if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (TID.isCommutable()) + NewSU->isCommutable = true; + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + isNewLoad = false; + } else { + LoadSU = NewSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + } + + SDep ChainPred; + SmallVector<SDep, 4> ChainSuccs; + SmallVector<SDep, 4> LoadPreds; + SmallVector<SDep, 4> NodePreds; + SmallVector<SDep, 4> NodeSuccs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainPred = *I; + else if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->isOperandOf(LoadNode)) + LoadPreds.push_back(*I); + else + NodePreds.push_back(*I); + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainSuccs.push_back(*I); + else + NodeSuccs.push_back(*I); + } + + if (ChainPred.getSUnit()) { + RemovePred(SU, ChainPred); + if (isNewLoad) + AddPred(LoadSU, ChainPred); + } + for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { + const SDep &Pred = LoadPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) { + AddPred(LoadSU, Pred); + } + } + for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { + const SDep &Pred = NodePreds[i]; + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { + SDep D = NodeSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + } + for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { + SDep D = ChainSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + if (isNewLoad) { + AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); + } + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) { + NewSU->isAvailable = true; + return NewSU; + } + SU = NewSU; + } + + DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; + NewSU = Clone(SU); + + // New SUnit has the exact same predecessors. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (!I->isArtificial()) + AddPred(NewSU, *I); + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(NewSU); + AddPred(SuccSU, D); + D.setSUnit(SU); + DelDeps.push_back(std::make_pair(SuccSU, D)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + ++NumDups; + return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVector<SUnit*, 2> &Copies) { + SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL)); + CopyFromSU->CopySrcRC = SrcRC; + CopyFromSU->CopyDstRC = DestRC; + + SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL)); + CopyToSU->CopySrcRC = DestRC; + CopyToSU->CopyDstRC = SrcRC; + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(CopyToSU); + AddPred(SuccSU, D); + DelDeps.push_back(std::make_pair(SuccSU, *I)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { + RemovePred(DelDeps[i].first, DelDeps[i].second); + } + + AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); + AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); + + Copies.push_back(CopyFromSU); + Copies.push_back(CopyToSU); + + ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, + const TargetInstrInfo *TII) { + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = TID.getNumDefs(); + for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } + return N->getValueType(NumRes); +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, + SmallVector<unsigned, 4> &LRegs){ + if (NumLiveRegs == 0) + return false; + + SmallSet<unsigned, 4> RegAdded; + // If this node would clobber any "live" register, then it's not ready. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + unsigned Reg = I->getReg(); + if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) { + if (RegAdded.insert(Reg)) + LRegs.push_back(Reg); + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); + *Alias; ++Alias) + if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) { + if (RegAdded.insert(*Alias)) + LRegs.push_back(*Alias); + } + } + } + + for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { + if (!Node->isMachineOpcode()) + continue; + const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); + if (!TID.ImplicitDefs) + continue; + for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { + if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) { + if (RegAdded.insert(*Reg)) + LRegs.push_back(*Reg); + } + for (const unsigned *Alias = TRI->getAliasSet(*Reg); + *Alias; ++Alias) + if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { + if (RegAdded.insert(*Alias)) + LRegs.push_back(*Alias); + } + } + } + return !LRegs.empty(); +} + + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGFast::ListScheduleBottomUp() { + unsigned CurCycle = 0; + + // Release any predecessors of the special Exit node. + ReleasePredecessors(&ExitSU, CurCycle); + + // Add root to Available queue. + if (!SUnits.empty()) { + SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; + assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); + RootSU->isAvailable = true; + AvailableQueue.push(RootSU); + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + SmallVector<SUnit*, 4> NotReady; + DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue.empty()) { + bool Delayed = false; + LRegsMap.clear(); + SUnit *CurSU = AvailableQueue.pop(); + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + Delayed = true; + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + NotReady.push_back(CurSU); + CurSU = AvailableQueue.pop(); + } + + // All candidates are delayed due to live physical reg dependencies. + // Try code duplication or inserting cross class copies + // to resolve it. + if (Delayed && !CurSU) { + if (!CurSU) { + // Try duplicating the nodes that produces these + // "expensive to copy" values to break the dependency. In case even + // that doesn't work, insert cross class copies. + SUnit *TrySU = NotReady[0]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is null, then it must be possible copy + // the value directly. Do not try duplicate the def. + SUnit *NewDef = 0; + if (DestRC) + NewDef = CopyAndMoveSuccessors(LRDef); + else + DestRC = RC; + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector<SUnit*, 2> Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + DOUT << "Adding an edge from SU # " << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"; + AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true)); + NewDef = Copies.back(); + } + + DOUT << "Adding an edge from SU # " << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"; + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + + if (!CurSU) { + assert(false && "Unable to resolve live physical register dependencies!"); + abort(); + } + } + + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + NotReady[i]->isPending = false; + // May no longer be available due to backtracking. + if (NotReady[i]->isAvailable) + AvailableQueue.push(NotReady[i]); + } + NotReady.clear(); + + if (CurSU) + ScheduleNodeBottomUp(CurSU, CurCycle); + ++CurCycle; + } + + // Reverse the order if it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + + +#ifndef NDEBUG + // Verify that all SUnits were scheduled. + bool AnyNotSched = false; + unsigned DeadNodes = 0; + unsigned Noops = 0; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + if (!SUnits[i].isScheduled) { + if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) { + ++DeadNodes; + continue; + } + if (!AnyNotSched) + cerr << "*** List scheduling failed! ***\n"; + SUnits[i].dump(this); + cerr << "has not been scheduled!\n"; + AnyNotSched = true; + } + if (SUnits[i].NumSuccsLeft != 0) { + if (!AnyNotSched) + cerr << "*** List scheduling failed! ***\n"; + SUnits[i].dump(this); + cerr << "has successors left!\n"; + AnyNotSched = true; + } + } + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(!AnyNotSched); + assert(Sequence.size() + DeadNodes - Noops == SUnits.size() && + "The number of nodes scheduled doesn't match the expected number!"); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAGSDNodes * +llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGFast(*IS->MF); +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp new file mode 100644 index 000000000000..c4325349990d --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -0,0 +1,268 @@ +//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/ADT/Statistic.h" +#include <climits> +using namespace llvm; + +STATISTIC(NumNoops , "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); + +static RegisterScheduler + tdListDAGScheduler("list-td", "Top-down list scheduler", + createTDListDAGScheduler); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGList - The actual list scheduler implementation. This supports +/// top-down scheduling. +/// +class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + SchedulingPriorityQueue *AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands become available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + +public: + ScheduleDAGList(MachineFunction &mf, + SchedulingPriorityQueue *availqueue, + ScheduleHazardRecognizer *HR) + : ScheduleDAGSDNodes(mf), + AvailableQueue(availqueue), HazardRec(HR) { + } + + ~ScheduleDAGList() { + delete HazardRec; + delete AvailableQueue; + } + + void Schedule(); + +private: + void ReleaseSucc(SUnit *SU, const SDep &D); + void ReleaseSuccessors(SUnit *SU); + void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void ListScheduleTopDown(); +}; +} // end anonymous namespace + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGList::Schedule() { + DOUT << "********** List Scheduling **********\n"; + + // Build the scheduling graph. + BuildSchedGraph(); + + AvailableQueue->initNodes(SUnits); + + ListScheduleTopDown(); + + AvailableQueue->releaseState(); +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { + SUnit *SuccSU = D.getSUnit(); + --SuccSU->NumPredsLeft; + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft < 0) { + cerr << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + PendingQueue.push_back(SuccSU); +} + +void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) { + // Top down: release successors. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + assert(!I->isAssignedRegDep() && + "The list-td scheduler doesn't yet support physreg dependencies!"); + + ReleaseSucc(SU, *I); + } +} + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(this)); + + Sequence.push_back(SU); + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + + ReleaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue->ScheduledNode(SU); +} + +/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void ScheduleDAGList::ListScheduleTopDown() { + unsigned CurCycle = 0; + + // Release any successors of the special Entry node. + ReleaseSuccessors(&EntrySU); + + // All leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.empty()) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector<SUnit*> NotReady; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i]->getDepth() == CurCycle) { + AvailableQueue->push(PendingQueue[i]); + PendingQueue[i]->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } else { + assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); + } + } + + // If there are no instructions available, don't try to issue anything, and + // don't advance the hazard recognizer. + if (AvailableQueue->empty()) { + ++CurCycle; + continue; + } + + SUnit *FoundSUnit = 0; + + bool HasNoopHazards = false; + while (!AvailableQueue->empty()) { + SUnit *CurSUnit = AvailableQueue->pop(); + + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(CurSUnit); + if (HT == ScheduleHazardRecognizer::NoHazard) { + FoundSUnit = CurSUnit; + break; + } + + // Remember if this is a noop hazard. + HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; + + NotReady.push_back(CurSUnit); + } + + // Add the nodes that aren't ready back onto the available list. + if (!NotReady.empty()) { + AvailableQueue->push_all(NotReady); + NotReady.clear(); + } + + // If we found a node to schedule, do it now. + if (FoundSUnit) { + ScheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundSUnit); + + // If this is a pseudo-op node, we don't want to increment the current + // cycle. + if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! + ++CurCycle; + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, just advance + // the current cycle and try again. + DOUT << "*** Advancing cycle, no work to do\n"; + HazardRec->AdvanceCycle(); + ++NumStalls; + ++CurCycle; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + DOUT << "*** Emitting noop\n"; + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL here means noop + ++NumNoops; + ++CurCycle; + } + } + +#ifndef NDEBUG + VerifySchedule(/*isBottomUp=*/false); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +/// createTDListDAGScheduler - This creates a top-down list scheduler with a +/// new hazard recognizer. This scheduler takes ownership of the hazard +/// recognizer and deletes it when done. +ScheduleDAGSDNodes * +llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGList(*IS->MF, + new LatencyPriorityQueue(), + IS->CreateTargetHazardRecognizer()); +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp new file mode 100644 index 000000000000..c97e2a8c86bf --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -0,0 +1,1533 @@ +//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements bottom-up and top-down register pressure reduction list +// schedulers, using standard algorithms. The basic approach uses a priority +// queue of available nodes to schedule. One at a time, nodes are taken from +// the priority queue (thus in priority order), checked for legality to +// schedule, and emitted if legal. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <climits> +using namespace llvm; + +STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); +STATISTIC(NumUnfolds, "Number of nodes unfolded"); +STATISTIC(NumDups, "Number of duplicated nodes"); +STATISTIC(NumPRCopies, "Number of physical register copies"); + +static RegisterScheduler + burrListDAGScheduler("list-burr", + "Bottom-up register reduction list scheduling", + createBURRListDAGScheduler); +static RegisterScheduler + tdrListrDAGScheduler("list-tdrr", + "Top-down register reduction list scheduling", + createTDRRListDAGScheduler); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGRRList - The actual register reduction list scheduler +/// implementation. This supports both top-down and bottom-up scheduling. +/// +class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes { +private: + /// isBottomUp - This is true if the scheduling problem is bottom-up, false if + /// it is top-down. + bool isBottomUp; + + /// AvailableQueue - The priority queue to use for the available SUnits. + SchedulingPriorityQueue *AvailableQueue; + + /// LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + unsigned NumLiveRegs; + std::vector<SUnit*> LiveRegDefs; + std::vector<unsigned> LiveRegCycles; + + /// Topo - A topological ordering for SUnits which permits fast IsReachable + /// and similar queries. + ScheduleDAGTopologicalSort Topo; + +public: + ScheduleDAGRRList(MachineFunction &mf, + bool isbottomup, + SchedulingPriorityQueue *availqueue) + : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), + AvailableQueue(availqueue), Topo(SUnits) { + } + + ~ScheduleDAGRRList() { + delete AvailableQueue; + } + + void Schedule(); + + /// IsReachable - Checks if SU is reachable from TargetSU. + bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { + return Topo.IsReachable(SU, TargetSU); + } + + /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will + /// create a cycle. + bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { + return Topo.WillCreateCycle(SU, TargetSU); + } + + /// AddPred - adds a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + /// Updates the topological ordering if required. + void AddPred(SUnit *SU, const SDep &D) { + Topo.AddPred(SU, D.getSUnit()); + SU->addPred(D); + } + + /// RemovePred - removes a predecessor edge from SUnit SU. + /// This returns true if an edge was removed. + /// Updates the topological ordering if required. + void RemovePred(SUnit *SU, const SDep &D) { + Topo.RemovePred(SU, D.getSUnit()); + SU->removePred(D); + } + +private: + void ReleasePred(SUnit *SU, const SDep *PredEdge); + void ReleasePredecessors(SUnit *SU, unsigned CurCycle); + void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); + void ReleaseSuccessors(SUnit *SU); + void CapturePred(SDep *PredEdge); + void ScheduleNodeBottomUp(SUnit*, unsigned); + void ScheduleNodeTopDown(SUnit*, unsigned); + void UnscheduleNodeBottomUp(SUnit*); + void BacktrackBottomUp(SUnit*, unsigned, unsigned&); + SUnit *CopyAndMoveSuccessors(SUnit*); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVector<SUnit*, 2>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + void ListScheduleTopDown(); + void ListScheduleBottomUp(); + + + /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. + /// Updates the topological ordering if required. + SUnit *CreateNewSUnit(SDNode *N) { + unsigned NumSUnits = SUnits.size(); + SUnit *NewNode = NewSUnit(N); + // Update the topological ordering. + if (NewNode->NodeNum >= NumSUnits) + Topo.InitDAGTopologicalSorting(); + return NewNode; + } + + /// CreateClone - Creates a new SUnit from an existing one. + /// Updates the topological ordering if required. + SUnit *CreateClone(SUnit *N) { + unsigned NumSUnits = SUnits.size(); + SUnit *NewNode = Clone(N); + // Update the topological ordering. + if (NewNode->NodeNum >= NumSUnits) + Topo.InitDAGTopologicalSorting(); + return NewNode; + } + + /// ForceUnitLatencies - Return true, since register-pressure-reducing + /// scheduling doesn't need actual latency information. + bool ForceUnitLatencies() const { return true; } +}; +} // end anonymous namespace + + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGRRList::Schedule() { + DOUT << "********** List Scheduling **********\n"; + + NumLiveRegs = 0; + LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegCycles.resize(TRI->getNumRegs(), 0); + + // Build the scheduling graph. + BuildSchedGraph(); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + Topo.InitDAGTopologicalSorting(); + + AvailableQueue->initNodes(SUnits); + + // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. + if (isBottomUp) + ListScheduleBottomUp(); + else + ListScheduleTopDown(); + + AvailableQueue->releaseState(); +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + --PredSU->NumSuccsLeft; + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft < 0) { + cerr << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + // If all the node's successors are scheduled, this node is ready + // to be scheduled. Ignore the special EntrySU node. + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { + PredSU->isAvailable = true; + AvailableQueue->push(PredSU); + } +} + +void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { + // Bottom up: release predecessors + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + ReleasePred(SU, &*I); + if (I->isAssignedRegDep()) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + if (!LiveRegDefs[I->getReg()]) { + ++NumLiveRegs; + LiveRegDefs[I->getReg()] = I->getSUnit(); + LiveRegCycles[I->getReg()] = CurCycle; + } + } + } +} + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(this)); + + assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); + SU->setHeightToAtLeast(CurCycle); + Sequence.push_back(SU); + + ReleasePredecessors(SU, CurCycle); + + // Release all the implicit physical register defs that are live. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == SU && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegCycles[I->getReg()] = 0; + } + } + } + + SU->isScheduled = true; + AvailableQueue->ScheduledNode(SU); +} + +/// CapturePred - This does the opposite of ReleasePred. Since SU is being +/// unscheduled, incrcease the succ left count of its predecessors. Remove +/// them from AvailableQueue if necessary. +void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + if (PredSU->isAvailable) { + PredSU->isAvailable = false; + if (!PredSU->isPending) + AvailableQueue->remove(PredSU); + } + + ++PredSU->NumSuccsLeft; +} + +/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and +/// its predecessor states to reflect the change. +void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { + DOUT << "*** Unscheduling [" << SU->getHeight() << "]: "; + DEBUG(SU->dump(this)); + + AvailableQueue->UnscheduledNode(SU); + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + CapturePred(&*I); + if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == I->getSUnit() && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegCycles[I->getReg()] = 0; + } + } + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (!LiveRegDefs[I->getReg()]) { + LiveRegDefs[I->getReg()] = SU; + ++NumLiveRegs; + } + if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) + LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); + } + } + + SU->setHeightDirty(); + SU->isScheduled = false; + SU->isAvailable = true; + AvailableQueue->push(SU); +} + +/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in +/// BTCycle in order to schedule a specific node. +void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, + unsigned &CurCycle) { + SUnit *OldSU = NULL; + while (CurCycle > BtCycle) { + OldSU = Sequence.back(); + Sequence.pop_back(); + if (SU->isSucc(OldSU)) + // Don't try to remove SU from AvailableQueue. + SU->isAvailable = false; + UnscheduleNodeBottomUp(OldSU); + --CurCycle; + } + + assert(!SU->isSucc(OldSU) && "Something is wrong!"); + + ++NumBacktracks; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { + if (SU->getNode()->getFlaggedNode()) + return NULL; + + SDNode *N = SU->getNode(); + if (!N) + return NULL; + + SUnit *NewSU; + bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getValueType(i); + if (VT == MVT::Flag) + return NULL; + else if (VT == MVT::Other) + TryUnfold = true; + } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDValue &Op = N->getOperand(i); + MVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (VT == MVT::Flag) + return NULL; + } + + if (TryUnfold) { + SmallVector<SDNode*, 2> NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return NULL; + + DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), + SDValue(LoadNode, 1)); + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + isNewLoad = false; + } else { + LoadSU = CreateNewSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + ComputeLatency(LoadSU); + } + + SUnit *NewSU = CreateNewSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != TID.getNumOperands(); ++i) { + if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (TID.isCommutable()) + NewSU->isCommutable = true; + ComputeLatency(NewSU); + + // Record all the edges to and from the old SU, by category. + SmallVector<SDep, 4> ChainPreds; + SmallVector<SDep, 4> ChainSuccs; + SmallVector<SDep, 4> LoadPreds; + SmallVector<SDep, 4> NodePreds; + SmallVector<SDep, 4> NodeSuccs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainPreds.push_back(*I); + else if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->isOperandOf(LoadNode)) + LoadPreds.push_back(*I); + else + NodePreds.push_back(*I); + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainSuccs.push_back(*I); + else + NodeSuccs.push_back(*I); + } + + // Now assign edges to the newly-created nodes. + for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { + const SDep &Pred = ChainPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { + const SDep &Pred = LoadPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { + const SDep &Pred = NodePreds[i]; + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { + SDep D = NodeSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + } + for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { + SDep D = ChainSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + + // Add a data dependency to reflect that NewSU reads the value defined + // by LoadSU. + AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); + + if (isNewLoad) + AvailableQueue->addNode(LoadSU); + AvailableQueue->addNode(NewSU); + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) { + NewSU->isAvailable = true; + return NewSU; + } + SU = NewSU; + } + + DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; + NewSU = CreateClone(SU); + + // New SUnit has the exact same predecessors. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (!I->isArtificial()) + AddPred(NewSU, *I); + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(NewSU); + AddPred(SuccSU, D); + D.setSUnit(SU); + DelDeps.push_back(std::make_pair(SuccSU, D)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(NewSU); + + ++NumDups; + return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVector<SUnit*, 2> &Copies) { + SUnit *CopyFromSU = CreateNewSUnit(NULL); + CopyFromSU->CopySrcRC = SrcRC; + CopyFromSU->CopyDstRC = DestRC; + + SUnit *CopyToSU = CreateNewSUnit(NULL); + CopyToSU->CopySrcRC = DestRC; + CopyToSU->CopyDstRC = SrcRC; + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(CopyToSU); + AddPred(SuccSU, D); + DelDeps.push_back(std::make_pair(SuccSU, *I)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); + AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(CopyFromSU); + AvailableQueue->addNode(CopyToSU); + Copies.push_back(CopyFromSU); + Copies.push_back(CopyToSU); + + ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, + const TargetInstrInfo *TII) { + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = TID.getNumDefs(); + for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } + return N->getValueType(NumRes); +} + +/// CheckForLiveRegDef - Return true and update live register vector if the +/// specified register def of the specified SUnit clobbers any "live" registers. +static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, + std::vector<SUnit*> &LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVector<unsigned, 4> &LRegs, + const TargetRegisterInfo *TRI) { + bool Added = false; + if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { + if (RegAdded.insert(Reg)) { + LRegs.push_back(Reg); + Added = true; + } + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) + if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { + if (RegAdded.insert(*Alias)) { + LRegs.push_back(*Alias); + Added = true; + } + } + return Added; +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, + SmallVector<unsigned, 4> &LRegs){ + if (NumLiveRegs == 0) + return false; + + SmallSet<unsigned, 4> RegAdded; + // If this node would clobber any "live" register, then it's not ready. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) + CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + RegAdded, LRegs, TRI); + } + + for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { + if (Node->getOpcode() == ISD::INLINEASM) { + // Inline asm can clobber physical defs. + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) + --NumOps; // Ignore the flag operand. + + for (unsigned i = 2; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = (Flags & 0xffff) >> 3; + + ++i; // Skip the ID value. + if ((Flags & 7) == 2 || (Flags & 7) == 6) { + // Check for def of register or earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } else + i += NumVals; + } + continue; + } + + if (!Node->isMachineOpcode()) + continue; + const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); + if (!TID.ImplicitDefs) + continue; + for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) + CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + return !LRegs.empty(); +} + + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGRRList::ListScheduleBottomUp() { + unsigned CurCycle = 0; + + // Release any predecessors of the special Exit node. + ReleasePredecessors(&ExitSU, CurCycle); + + // Add root to Available queue. + if (!SUnits.empty()) { + SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; + assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); + RootSU->isAvailable = true; + AvailableQueue->push(RootSU); + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + SmallVector<SUnit*, 4> NotReady; + DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty()) { + bool Delayed = false; + LRegsMap.clear(); + SUnit *CurSU = AvailableQueue->pop(); + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + Delayed = true; + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + NotReady.push_back(CurSU); + CurSU = AvailableQueue->pop(); + } + + // All candidates are delayed due to live physical reg dependencies. + // Try backtracking, code duplication, or inserting cross class copies + // to resolve it. + if (Delayed && !CurSU) { + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + SUnit *TrySU = NotReady[i]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + + // Try unscheduling up to the point where it's safe to schedule + // this node. + unsigned LiveCycle = CurCycle; + for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { + unsigned Reg = LRegs[j]; + unsigned LCycle = LiveRegCycles[Reg]; + LiveCycle = std::min(LiveCycle, LCycle); + } + SUnit *OldSU = Sequence[LiveCycle]; + if (!WillCreateCycle(TrySU, OldSU)) { + BacktrackBottomUp(TrySU, LiveCycle, CurCycle); + // Force the current node to be scheduled before the node that + // requires the physical reg dep. + if (OldSU->isAvailable) { + OldSU->isAvailable = false; + AvailableQueue->remove(OldSU); + } + AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true)); + // If one or more successors has been unscheduled, then the current + // node is no longer avaialable. Schedule a successor that's now + // available instead. + if (!TrySU->isAvailable) + CurSU = AvailableQueue->pop(); + else { + CurSU = TrySU; + TrySU->isPending = false; + NotReady.erase(NotReady.begin()+i); + } + break; + } + } + + if (!CurSU) { + // Can't backtrack. If it's too expensive to copy the value, then try + // duplicate the nodes that produces these "too expensive to copy" + // values to break the dependency. In case even that doesn't work, + // insert cross class copies. + // If it's not too expensive, i.e. cost != -1, issue copies. + SUnit *TrySU = NotReady[0]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is null, then it must be possible copy + // the value directly. Do not try duplicate the def. + SUnit *NewDef = 0; + if (DestRC) + NewDef = CopyAndMoveSuccessors(LRDef); + else + DestRC = RC; + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector<SUnit*, 2> Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + DOUT << "Adding an edge from SU #" << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"; + AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + NewDef = Copies.back(); + } + + DOUT << "Adding an edge from SU #" << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"; + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + + assert(CurSU && "Unable to resolve live physical register dependencies!"); + } + + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + NotReady[i]->isPending = false; + // May no longer be available due to backtracking. + if (NotReady[i]->isAvailable) + AvailableQueue->push(NotReady[i]); + } + NotReady.clear(); + + if (CurSU) + ScheduleNodeBottomUp(CurSU, CurCycle); + ++CurCycle; + } + + // Reverse the order if it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + +#ifndef NDEBUG + VerifySchedule(isBottomUp); +#endif +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + --SuccSU->NumPredsLeft; + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft < 0) { + cerr << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + cerr << " has been released too many times!\n"; + assert(0); + } +#endif + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { + SuccSU->isAvailable = true; + AvailableQueue->push(SuccSU); + } +} + +void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { + // Top down: release successors + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + assert(!I->isAssignedRegDep() && + "The list-tdrr scheduler doesn't yet support physreg dependencies!"); + + ReleaseSucc(SU, &*I); + } +} + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(this)); + + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + Sequence.push_back(SU); + + ReleaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue->ScheduledNode(SU); +} + +/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void ScheduleDAGRRList::ListScheduleTopDown() { + unsigned CurCycle = 0; + + // Release any successors of the special Entry node. + ReleaseSuccessors(&EntrySU); + + // All leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.empty()) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty()) { + SUnit *CurSU = AvailableQueue->pop(); + + if (CurSU) + ScheduleNodeTopDown(CurSU, CurCycle); + ++CurCycle; + } + +#ifndef NDEBUG + VerifySchedule(isBottomUp); +#endif +} + + +//===----------------------------------------------------------------------===// +// RegReductionPriorityQueue Implementation +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers +// to reduce register pressure. +// +namespace { + template<class SF> + class RegReductionPriorityQueue; + + /// Sorting functions for the Available queue. + struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; + bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} + bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; + + struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; + td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} + td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; +} // end anonymous namespace + +/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. +/// Smaller number is the higher priority. +static unsigned +CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { + unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; + if (SethiUllmanNumber != 0) + return SethiUllmanNumber; + + unsigned Extra = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + SUnit *PredSU = I->getSUnit(); + unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); + if (PredSethiUllman > SethiUllmanNumber) { + SethiUllmanNumber = PredSethiUllman; + Extra = 0; + } else if (PredSethiUllman == SethiUllmanNumber) + ++Extra; + } + + SethiUllmanNumber += Extra; + + if (SethiUllmanNumber == 0) + SethiUllmanNumber = 1; + + return SethiUllmanNumber; +} + +namespace { + template<class SF> + class VISIBILITY_HIDDEN RegReductionPriorityQueue + : public SchedulingPriorityQueue { + PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; + unsigned currentQueueId; + + protected: + // SUnits - The SUnits for the current graph. + std::vector<SUnit> *SUnits; + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + ScheduleDAGRRList *scheduleDAG; + + // SethiUllmanNumbers - The SethiUllman number for each node. + std::vector<unsigned> SethiUllmanNumbers; + + public: + RegReductionPriorityQueue(const TargetInstrInfo *tii, + const TargetRegisterInfo *tri) : + Queue(SF(this)), currentQueueId(0), + TII(tii), TRI(tri), scheduleDAG(NULL) {} + + void initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + AddPseudoTwoAddrDeps(); + // Reroute edges to nodes with multiple uses. + PrescheduleNodesWithMultipleUses(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); + } + + void addNode(const SUnit *SU) { + unsigned SUSize = SethiUllmanNumbers.size(); + if (SUnits->size() > SUSize) + SethiUllmanNumbers.resize(SUSize*2, 0); + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); + } + + void updateNode(const SUnit *SU) { + SethiUllmanNumbers[SU->NodeNum] = 0; + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); + } + + void releaseState() { + SUnits = 0; + SethiUllmanNumbers.clear(); + } + + unsigned getNodePriority(const SUnit *SU) const { + assert(SU->NodeNum < SethiUllmanNumbers.size()); + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return 0; + if (Opc == TargetInstrInfo::EXTRACT_SUBREG || + Opc == TargetInstrInfo::SUBREG_TO_REG || + Opc == TargetInstrInfo::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return 0; + if (SU->NumSuccs == 0 && SU->NumPreds != 0) + // If SU does not have a register use, i.e. it doesn't produce a value + // that would be consumed (e.g. store), then it terminates a chain of + // computation. Give it a large SethiUllman number so it will be + // scheduled right before its predecessors that it doesn't lengthen + // their live ranges. + return 0xffff; + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return 0; + return SethiUllmanNumbers[SU->NodeNum]; + } + + unsigned size() const { return Queue.size(); } + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + assert(!U->NodeQueueId && "Node in the queue already"); + U->NodeQueueId = ++currentQueueId; + Queue.push(U); + } + + void push_all(const std::vector<SUnit *> &Nodes) { + for (unsigned i = 0, e = Nodes.size(); i != e; ++i) + push(Nodes[i]); + } + + SUnit *pop() { + if (empty()) return NULL; + SUnit *V = Queue.top(); + Queue.pop(); + V->NodeQueueId = 0; + return V; + } + + void remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + assert(SU->NodeQueueId != 0 && "Not in queue!"); + Queue.erase_one(SU); + SU->NodeQueueId = 0; + } + + void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { + scheduleDAG = scheduleDag; + } + + protected: + bool canClobber(const SUnit *SU, const SUnit *Op); + void AddPseudoTwoAddrDeps(); + void PrescheduleNodesWithMultipleUses(); + void CalculateSethiUllmanNumbers(); + }; + + typedef RegReductionPriorityQueue<bu_ls_rr_sort> + BURegReductionPriorityQueue; + + typedef RegReductionPriorityQueue<td_ls_rr_sort> + TDRegReductionPriorityQueue; +} + +/// closestSucc - Returns the scheduled cycle of the successor which is +/// closest to the current cycle. +static unsigned closestSucc(const SUnit *SU) { + unsigned MaxHeight = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain succs + unsigned Height = I->getSUnit()->getHeight(); + // If there are bunch of CopyToRegs stacked up, they should be considered + // to be at the same position. + if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) + Height = closestSucc(I->getSUnit())+1; + if (Height > MaxHeight) + MaxHeight = Height; + } + return MaxHeight; +} + +/// calcMaxScratches - Returns an cost estimate of the worse case requirement +/// for scratch registers, i.e. number of data dependencies. +static unsigned calcMaxScratches(const SUnit *SU) { + unsigned Scratches = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + Scratches++; + } + return Scratches; +} + +// Bottom up +bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + unsigned LPriority = SPQ->getNodePriority(left); + unsigned RPriority = SPQ->getNodePriority(right); + if (LPriority != RPriority) + return LPriority > RPriority; + + // Try schedule def + use closer when Sethi-Ullman numbers are the same. + // e.g. + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // and the following instructions are both ready. + // t2 = op c3 + // t4 = op c4 + // + // Then schedule t2 = op first. + // i.e. + // t4 = op c4 + // t2 = op c3 + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // This creates more short live intervals. + unsigned LDist = closestSucc(left); + unsigned RDist = closestSucc(right); + if (LDist != RDist) + return LDist < RDist; + + // How many registers becomes live when the node is scheduled. + unsigned LScratch = calcMaxScratches(left); + unsigned RScratch = calcMaxScratches(right); + if (LScratch != RScratch) + return LScratch > RScratch; + + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); + + assert(left->NodeQueueId && right->NodeQueueId && + "NodeQueueId cannot be zero"); + return (left->NodeQueueId > right->NodeQueueId); +} + +template<class SF> +bool +RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { + if (SU->isTwoAddress) { + unsigned Opc = SU->getNode()->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + unsigned NumRes = TID.getNumDefs(); + unsigned NumOps = TID.getNumOperands() - NumRes; + for (unsigned i = 0; i != NumOps; ++i) { + if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { + SDNode *DU = SU->getNode()->getOperand(i).getNode(); + if (DU->getNodeId() != -1 && + Op->OrigNode == &(*SUnits)[DU->getNodeId()]) + return true; + } + } + } + return false; +} + + +/// hasCopyToRegUse - Return true if SU has a value successor that is a +/// CopyToReg node. +static bool hasCopyToRegUse(const SUnit *SU) { + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *SuccSU = I->getSUnit(); + if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) + return true; + } + return false; +} + +/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's +/// physical register defs. +static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + SDNode *N = SuccSU->getNode(); + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + assert(ImpDefs && "Caller should check hasPhysRegDefs"); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getFlaggedNode()) { + if (!SUNode->isMachineOpcode()) + continue; + const unsigned *SUImpDefs = + TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); + if (!SUImpDefs) + return false; + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getValueType(i); + if (VT == MVT::Flag || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned Reg = ImpDefs[i - NumDefs]; + for (;*SUImpDefs; ++SUImpDefs) { + unsigned SUReg = *SUImpDefs; + if (TRI->regsOverlap(Reg, SUReg)) + return true; + } + } + } + return false; +} + +/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses +/// are not handled well by the general register pressure reduction +/// heuristics. When presented with code like this: +/// +/// N +/// / | +/// / | +/// U store +/// | +/// ... +/// +/// the heuristics tend to push the store up, but since the +/// operand of the store has another use (U), this would increase +/// the length of that other use (the U->N edge). +/// +/// This function transforms code like the above to route U's +/// dependence through the store when possible, like this: +/// +/// N +/// || +/// || +/// store +/// | +/// U +/// | +/// ... +/// +/// This results in the store being scheduled immediately +/// after N, which shortens the U->N live range, reducing +/// register pressure. +/// +template<class SF> +void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { + // Visit all the nodes in topological order, working top-down. + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + // For now, only look at nodes with no data successors, such as stores. + // These are especially important, due to the heuristics in + // getNodePriority for nodes with no data successors. + if (SU->NumSuccs != 0) + continue; + // For now, only look at nodes with exactly one data predecessor. + if (SU->NumPreds != 1) + continue; + // Avoid prescheduling copies to virtual registers, which don't behave + // like other nodes from the perspective of scheduling heuristics. + if (SDNode *N = SU->getNode()) + if (N->getOpcode() == ISD::CopyToReg && + TargetRegisterInfo::isVirtualRegister + (cast<RegisterSDNode>(N->getOperand(1))->getReg())) + continue; + + // Locate the single data predecessor. + SUnit *PredSU = 0; + for (SUnit::const_pred_iterator II = SU->Preds.begin(), + EE = SU->Preds.end(); II != EE; ++II) + if (!II->isCtrl()) { + PredSU = II->getSUnit(); + break; + } + assert(PredSU); + + // Don't rewrite edges that carry physregs, because that requires additional + // support infrastructure. + if (PredSU->hasPhysRegDefs) + continue; + // Short-circuit the case where SU is PredSU's only data successor. + if (PredSU->NumSuccs == 1) + continue; + // Avoid prescheduling to copies from virtual registers, which don't behave + // like other nodes from the perspective of scheduling // heuristics. + if (SDNode *N = SU->getNode()) + if (N->getOpcode() == ISD::CopyFromReg && + TargetRegisterInfo::isVirtualRegister + (cast<RegisterSDNode>(N->getOperand(1))->getReg())) + continue; + + // Perform checks on the successors of PredSU. + for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), + EE = PredSU->Succs.end(); II != EE; ++II) { + SUnit *PredSuccSU = II->getSUnit(); + if (PredSuccSU == SU) continue; + // If PredSU has another successor with no data successors, for + // now don't attempt to choose either over the other. + if (PredSuccSU->NumSuccs == 0) + goto outer_loop_continue; + // Don't break physical register dependencies. + if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) + if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) + goto outer_loop_continue; + // Don't introduce graph cycles. + if (scheduleDAG->IsReachable(SU, PredSuccSU)) + goto outer_loop_continue; + } + + // Ok, the transformation is safe and the heuristics suggest it is + // profitable. Update the graph. + DOUT << "Prescheduling SU # " << SU->NodeNum + << " next to PredSU # " << PredSU->NodeNum + << " to guide scheduling in the presence of multiple uses\n"; + for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { + SDep Edge = PredSU->Succs[i]; + assert(!Edge.isAssignedRegDep()); + SUnit *SuccSU = Edge.getSUnit(); + if (SuccSU != SU) { + Edge.setSUnit(PredSU); + scheduleDAG->RemovePred(SuccSU, Edge); + scheduleDAG->AddPred(SU, Edge); + Edge.setSUnit(SU); + scheduleDAG->AddPred(SuccSU, Edge); + --i; + } + } + outer_loop_continue:; + } +} + +/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses +/// it as a def&use operand. Add a pseudo control edge from it to the other +/// node (if it won't create a cycle) so the two-address one will be scheduled +/// first (lower in the schedule). If both nodes are two-address, favor the +/// one that has a CopyToReg use (more likely to be a loop induction update). +/// If both are two-address, but one is commutable while the other is not +/// commutable, favor the one that's not commutable. +template<class SF> +void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + if (!SU->isTwoAddress) + continue; + + SDNode *Node = SU->getNode(); + if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) + continue; + + unsigned Opc = Node->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + unsigned NumRes = TID.getNumDefs(); + unsigned NumOps = TID.getNumOperands() - NumRes; + for (unsigned j = 0; j != NumOps; ++j) { + if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) + continue; + SDNode *DU = SU->getNode()->getOperand(j).getNode(); + if (DU->getNodeId() == -1) + continue; + const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; + if (!DUSU) continue; + for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), + E = DUSU->Succs.end(); I != E; ++I) { + if (I->isCtrl()) continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU == SU) + continue; + // Be conservative. Ignore if nodes aren't at roughly the same + // depth and height. + if (SuccSU->getHeight() < SU->getHeight() && + (SU->getHeight() - SuccSU->getHeight()) > 1) + continue; + // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge + // constrains whatever is using the copy, instead of the copy + // itself. In the case that the copy is coalesced, this + // preserves the intent of the pseudo two-address heurietics. + while (SuccSU->Succs.size() == 1 && + SuccSU->getNode()->isMachineOpcode() && + SuccSU->getNode()->getMachineOpcode() == + TargetInstrInfo::COPY_TO_REGCLASS) + SuccSU = SuccSU->Succs.front().getSUnit(); + // Don't constrain non-instruction nodes. + if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) + continue; + // Don't constrain nodes with physical register defs if the + // predecessor can clobber them. + if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { + if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) + continue; + } + // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; + // these may be coalesced away. We want them close to their uses. + unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); + if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || + SuccOpc == TargetInstrInfo::INSERT_SUBREG || + SuccOpc == TargetInstrInfo::SUBREG_TO_REG) + continue; + if ((!canClobber(SuccSU, DUSU) || + (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || + (!SU->isCommutable && SuccSU->isCommutable)) && + !scheduleDAG->IsReachable(SuccSU, SU)) { + DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum + << " to SU #" << SuccSU->NodeNum << "\n"; + scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + } + } + } + } +} + +/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all +/// scheduling units. +template<class SF> +void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { + SethiUllmanNumbers.assign(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); +} + +/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled +/// predecessors of the successors of the SUnit SU. Stop when the provided +/// limit is exceeded. +static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, + unsigned Limit) { + unsigned Sum = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + const SUnit *SuccSU = I->getSUnit(); + for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), + EE = SuccSU->Preds.end(); II != EE; ++II) { + SUnit *PredSU = II->getSUnit(); + if (!PredSU->isScheduled) + if (++Sum > Limit) + return Sum; + } + } + return Sum; +} + + +// Top down +bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + unsigned LPriority = SPQ->getNodePriority(left); + unsigned RPriority = SPQ->getNodePriority(right); + bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); + bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); + bool LIsFloater = LIsTarget && left->NumPreds == 0; + bool RIsFloater = RIsTarget && right->NumPreds == 0; + unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; + unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; + + if (left->NumSuccs == 0 && right->NumSuccs != 0) + return false; + else if (left->NumSuccs != 0 && right->NumSuccs == 0) + return true; + + if (LIsFloater) + LBonus -= 2; + if (RIsFloater) + RBonus -= 2; + if (left->NumSuccs == 1) + LBonus += 2; + if (right->NumSuccs == 1) + RBonus += 2; + + if (LPriority+LBonus != RPriority+RBonus) + return LPriority+LBonus < RPriority+RBonus; + + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); + + if (left->NumSuccsLeft != right->NumSuccsLeft) + return left->NumSuccsLeft > right->NumSuccsLeft; + + assert(left->NodeQueueId && right->NodeQueueId && + "NodeQueueId cannot be zero"); + return (left->NodeQueueId > right->NodeQueueId); +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAGSDNodes * +llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); + + ScheduleDAGRRList *SD = + new ScheduleDAGRRList(*IS->MF, true, PQ); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); + + ScheduleDAGRRList *SD = + new ScheduleDAGRRList(*IS->MF, false, PQ); + PQ->setScheduleDAG(SD); + return SD; +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp new file mode 100644 index 000000000000..7aa15bcc6862 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -0,0 +1,294 @@ +//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG class, which is a base class used by +// scheduling implementation classes. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) + : ScheduleDAG(mf) { +} + +/// Run - perform scheduling. +/// +void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb, + MachineBasicBlock::iterator insertPos) { + DAG = dag; + ScheduleDAG::Run(bb, insertPos); +} + +SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { + SUnit *SU = NewSUnit(Old->getNode()); + SU->OrigNode = Old->OrigNode; + SU->Latency = Old->Latency; + SU->isTwoAddress = Old->isTwoAddress; + SU->isCommutable = Old->isCommutable; + SU->hasPhysRegDefs = Old->hasPhysRegDefs; + SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + Old->isCloned = true; + return SU; +} + +/// CheckForPhysRegDependency - Check if the dependency between def and use of +/// a specified operand is a physical register dependency. If so, returns the +/// register and the cost of copying the register. +static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, + unsigned &PhysReg, int &Cost) { + if (Op != 2 || User->getOpcode() != ISD::CopyToReg) + return; + + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return; + + unsigned ResNo = User->getOperand(2).getResNo(); + if (Def->isMachineOpcode()) { + const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); + if (ResNo >= II.getNumDefs() && + II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { + PhysReg = Reg; + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo)); + Cost = RC->getCopyCost(); + } + } +} + +void ScheduleDAGSDNodes::BuildSchedUnits() { + // During scheduling, the NodeId field of SDNode is used to map SDNodes + // to their associated SUnits by holding SUnits table indices. A value + // of -1 means the SDNode does not yet have an associated SUnit. + unsigned NumNodes = 0; + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), + E = DAG->allnodes_end(); NI != E; ++NI) { + NI->setNodeId(-1); + ++NumNodes; + } + + // Reserve entries in the vector for each of the SUnits we are creating. This + // ensure that reallocation of the vector won't happen, so SUnit*'s won't get + // invalidated. + // FIXME: Multiply by 2 because we may clone nodes during scheduling. + // This is a temporary workaround. + SUnits.reserve(NumNodes * 2); + + // Check to see if the scheduler cares about latencies. + bool UnitLatencies = ForceUnitLatencies(); + + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), + E = DAG->allnodes_end(); NI != E; ++NI) { + if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. + continue; + + // If this node has already been processed, stop now. + if (NI->getNodeId() != -1) continue; + + SUnit *NodeSUnit = NewSUnit(NI); + + // See if anything is flagged to this node, if so, add them to flagged + // nodes. Nodes can have at most one flag input and one flag output. Flags + // are required to be the last operand and result of a node. + + // Scan up to find flagged preds. + SDNode *N = NI; + while (N->getNumOperands() && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) { + N = N->getOperand(N->getNumOperands()-1).getNode(); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + } + + // Scan down to find any flagged succs. + N = NI; + while (N->getValueType(N->getNumValues()-1) == MVT::Flag) { + SDValue FlagVal(N, N->getNumValues()-1); + + // There are either zero or one users of the Flag result. + bool HasFlagUse = false; + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) + if (FlagVal.isOperandOf(*UI)) { + HasFlagUse = true; + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + N = *UI; + break; + } + if (!HasFlagUse) break; + } + + // If there are flag operands involved, N is now the bottom-most node + // of the sequence of nodes that are flagged together. + // Update the SUnit. + NodeSUnit->setNode(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + + // Assign the Latency field of NodeSUnit using target-provided information. + if (UnitLatencies) + NodeSUnit->Latency = 1; + else + ComputeLatency(NodeSUnit); + } +} + +void ScheduleDAGSDNodes::AddSchedEdges() { + // Pass 2: add the preds, succs, etc. + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + SUnit *SU = &SUnits[su]; + SDNode *MainNode = SU->getNode(); + + if (MainNode->isMachineOpcode()) { + unsigned Opc = MainNode->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + for (unsigned i = 0; i != TID.getNumOperands(); ++i) { + if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + SU->isTwoAddress = true; + break; + } + } + if (TID.isCommutable()) + SU->isCommutable = true; + } + + // Find all predecessors and successors of the group. + for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) { + if (N->isMachineOpcode() && + TII->get(N->getMachineOpcode()).getImplicitDefs()) { + SU->hasPhysRegClobbers = true; + unsigned NumUsed = CountResults(N); + while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) + --NumUsed; // Skip over unused values at the end. + if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) + SU->hasPhysRegDefs = true; + } + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDNode *OpN = N->getOperand(i).getNode(); + if (isPassiveNode(OpN)) continue; // Not scheduled. + SUnit *OpSU = &SUnits[OpN->getNodeId()]; + assert(OpSU && "Node has no SUnit!"); + if (OpSU == SU) continue; // In the same group. + + MVT OpVT = N->getOperand(i).getValueType(); + assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); + bool isChain = OpVT == MVT::Other; + + unsigned PhysReg = 0; + int Cost = 1; + // Determine if this is a physical register dependency. + CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); + assert((PhysReg == 0 || !isChain) && + "Chain dependence via physreg data?"); + // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler + // emits a copy from the physical register to a virtual register unless + // it requires a cross class copy (cost < 0). That means we are only + // treating "expensive to copy" register dependency as physical register + // dependency. This may change in the future though. + if (Cost >= 0) + PhysReg = 0; + SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data, + OpSU->Latency, PhysReg)); + } + } + } +} + +/// BuildSchedGraph - Build the SUnit graph from the selection dag that we +/// are input. This SUnit graph is similar to the SelectionDAG, but +/// excludes nodes that aren't interesting to scheduling, and represents +/// flagged together nodes with a single SUnit. +void ScheduleDAGSDNodes::BuildSchedGraph() { + // Populate the SUnits array. + BuildSchedUnits(); + // Compute all the scheduling dependencies between nodes. + AddSchedEdges(); +} + +void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + + // Compute the latency for the node. We use the sum of the latencies for + // all nodes flagged together into this SUnit. + SU->Latency = 0; + bool SawMachineOpcode = false; + for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) + if (N->isMachineOpcode()) { + SawMachineOpcode = true; + SU->Latency += + InstrItins.getLatency(TII->get(N->getMachineOpcode()).getSchedClass()); + } +} + +/// CountResults - The results of target nodes have register or immediate +/// operands first, then an optional chain, and optional flag operands (which do +/// not go into the resulting MachineInstr). +unsigned ScheduleDAGSDNodes::CountResults(SDNode *Node) { + unsigned N = Node->getNumValues(); + while (N && Node->getValueType(N - 1) == MVT::Flag) + --N; + if (N && Node->getValueType(N - 1) == MVT::Other) + --N; // Skip over chain result. + return N; +} + +/// CountOperands - The inputs to target nodes have any actual inputs first, +/// followed by special operands that describe memory references, then an +/// optional chain operand, then an optional flag operand. Compute the number +/// of actual operands that will go into the resulting MachineInstr. +unsigned ScheduleDAGSDNodes::CountOperands(SDNode *Node) { + unsigned N = ComputeMemOperandsEnd(Node); + while (N && isa<MemOperandSDNode>(Node->getOperand(N - 1).getNode())) + --N; // Ignore MEMOPERAND nodes + return N; +} + +/// ComputeMemOperandsEnd - Find the index one past the last MemOperandSDNode +/// operand +unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) { + unsigned N = Node->getNumOperands(); + while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag) + --N; + if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) + --N; // Ignore chain if it exists. + return N; +} + + +void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { + if (!SU->getNode()) { + cerr << "PHYS REG COPY\n"; + return; + } + + SU->getNode()->dump(DAG); + cerr << "\n"; + SmallVector<SDNode *, 4> FlaggedNodes; + for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) + FlaggedNodes.push_back(N); + while (!FlaggedNodes.empty()) { + cerr << " "; + FlaggedNodes.back()->dump(DAG); + cerr << "\n"; + FlaggedNodes.pop_back(); + } +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h new file mode 100644 index 000000000000..2a278b749a8c --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -0,0 +1,179 @@ +//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScheduleDAGSDNodes class, which implements +// scheduling for an SDNode-based dependency graph. +// +//===----------------------------------------------------------------------===// + +#ifndef SCHEDULEDAGSDNODES_H +#define SCHEDULEDAGSDNODES_H + +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SelectionDAG.h" + +namespace llvm { + /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. + /// + /// Edges between SUnits are initially based on edges in the SelectionDAG, + /// and additional edges can be added by the schedulers as heuristics. + /// SDNodes such as Constants, Registers, and a few others that are not + /// interesting to schedulers are not allocated SUnits. + /// + /// SDNodes with MVT::Flag operands are grouped along with the flagged + /// nodes into a single SUnit so that they are scheduled together. + /// + /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output + /// edges. Physical register dependence information is not carried in + /// the DAG and must be handled explicitly by schedulers. + /// + class ScheduleDAGSDNodes : public ScheduleDAG { + public: + SelectionDAG *DAG; // DAG of the current basic block + + explicit ScheduleDAGSDNodes(MachineFunction &mf); + + virtual ~ScheduleDAGSDNodes() {} + + /// Run - perform scheduling. + /// + void Run(SelectionDAG *dag, MachineBasicBlock *bb, + MachineBasicBlock::iterator insertPos); + + /// isPassiveNode - Return true if the node is a non-scheduled leaf. + /// + static bool isPassiveNode(SDNode *Node) { + if (isa<ConstantSDNode>(Node)) return true; + if (isa<ConstantFPSDNode>(Node)) return true; + if (isa<RegisterSDNode>(Node)) return true; + if (isa<GlobalAddressSDNode>(Node)) return true; + if (isa<BasicBlockSDNode>(Node)) return true; + if (isa<FrameIndexSDNode>(Node)) return true; + if (isa<ConstantPoolSDNode>(Node)) return true; + if (isa<JumpTableSDNode>(Node)) return true; + if (isa<ExternalSymbolSDNode>(Node)) return true; + if (isa<MemOperandSDNode>(Node)) return true; + if (Node->getOpcode() == ISD::EntryToken) return true; + return false; + } + + /// NewSUnit - Creates a new SUnit and return a ptr to it. + /// + SUnit *NewSUnit(SDNode *N) { +#ifndef NDEBUG + const SUnit *Addr = 0; + if (!SUnits.empty()) + Addr = &SUnits[0]; +#endif + SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); + assert((Addr == 0 || Addr == &SUnits[0]) && + "SUnits std::vector reallocated on the fly!"); + SUnits.back().OrigNode = &SUnits.back(); + return &SUnits.back(); + } + + /// Clone - Creates a clone of the specified SUnit. It does not copy the + /// predecessors / successors info nor the temporary scheduling states. + /// + SUnit *Clone(SUnit *N); + + /// BuildSchedGraph - Build the SUnit graph from the selection dag that we + /// are input. This SUnit graph is similar to the SelectionDAG, but + /// excludes nodes that aren't interesting to scheduling, and represents + /// flagged together nodes with a single SUnit. + virtual void BuildSchedGraph(); + + /// ComputeLatency - Compute node latency. + /// + virtual void ComputeLatency(SUnit *SU); + + /// CountResults - The results of target nodes have register or immediate + /// operands first, then an optional chain, and optional flag operands + /// (which do not go into the machine instrs.) + static unsigned CountResults(SDNode *Node); + + /// CountOperands - The inputs to target nodes have any actual inputs first, + /// followed by special operands that describe memory references, then an + /// optional chain operand, then flag operands. Compute the number of + /// actual operands that will go into the resulting MachineInstr. + static unsigned CountOperands(SDNode *Node); + + /// ComputeMemOperandsEnd - Find the index one past the last + /// MemOperandSDNode operand + static unsigned ComputeMemOperandsEnd(SDNode *Node); + + /// EmitNode - Generate machine code for an node and needed dependencies. + /// VRBaseMap contains, for each already emitted node, the first virtual + /// register number for the results of the node. + /// + void EmitNode(SDNode *Node, bool IsClone, bool HasClone, + DenseMap<SDValue, unsigned> &VRBaseMap); + + virtual MachineBasicBlock *EmitSchedule(); + + /// Schedule - Order nodes according to selected style, filling + /// in the Sequence member. + /// + virtual void Schedule() = 0; + + virtual void dumpNode(const SUnit *SU) const; + + virtual std::string getGraphNodeLabel(const SUnit *SU) const; + + virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; + + private: + /// EmitSubregNode - Generate machine code for subreg nodes. + /// + void EmitSubregNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS + /// nodes. + /// + void EmitCopyToRegClassNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// getVR - Return the virtual register corresponding to the specified result + /// of the specified node. + unsigned getVR(SDValue Op, DenseMap<SDValue, unsigned> &VRBaseMap); + + /// getDstOfCopyToRegUse - If the only use of the specified result number of + /// node is a CopyToReg, return its destination register. Return 0 otherwise. + unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, unsigned ResNo) const; + + void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// AddRegisterOperand - Add the specified register as an operand to the + /// specified machine instr. Insert register copies if the register is + /// not in the required register class. + void AddRegisterOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an + /// implicit physical register output. + void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, + bool IsCloned, unsigned SrcReg, + DenseMap<SDValue, unsigned> &VRBaseMap); + + void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, + const TargetInstrDesc &II, bool IsClone, + bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. + void BuildSchedUnits(); + void AddSchedEdges(); + }; +} + +#endif diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp new file mode 100644 index 000000000000..fb5e207e81bb --- /dev/null +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp @@ -0,0 +1,668 @@ +//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the Emit routines for the ScheduleDAG class, which creates +// MachineInstrs according to the computed schedule. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pre-RA-sched" +#include "ScheduleDAGSDNodes.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an +/// implicit physical register output. +void ScheduleDAGSDNodes::EmitCopyFromReg(SDNode *Node, unsigned ResNo, + bool IsClone, bool IsCloned, + unsigned SrcReg, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VRBase = 0; + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Just use the input register directly! + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + return; + } + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + bool MatchReg = true; + const TargetRegisterClass *UseRC = NULL; + if (!IsClone && !IsCloned) + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + bool Match = true; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + Match = false; + } else if (DestReg != SrcReg) + Match = false; + } else { + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + SDValue Op = User->getOperand(i); + if (Op.getNode() != Node || Op.getResNo() != ResNo) + continue; + MVT VT = Node->getValueType(Op.getResNo()); + if (VT == MVT::Other || VT == MVT::Flag) + continue; + Match = false; + if (User->isMachineOpcode()) { + const TargetInstrDesc &II = TII->get(User->getMachineOpcode()); + const TargetRegisterClass *RC = + getInstrOperandRegClass(TRI, II, i+II.getNumDefs()); + if (!UseRC) + UseRC = RC; + else if (RC) { + if (UseRC->hasSuperClass(RC)) + UseRC = RC; + else + assert((UseRC == RC || RC->hasSuperClass(UseRC)) && + "Multiple uses expecting different register classes!"); + } + } + } + } + MatchReg &= Match; + if (VRBase) + break; + } + + MVT VT = Node->getValueType(ResNo); + const TargetRegisterClass *SrcRC = 0, *DstRC = 0; + SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT); + + // Figure out the register class to create for the destreg. + if (VRBase) { + DstRC = MRI.getRegClass(VRBase); + } else if (UseRC) { + assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); + DstRC = UseRC; + } else { + DstRC = TLI->getRegClassFor(VT); + } + + // If all uses are reading from the src physical register and copying the + // register is either impossible or very expensive, then don't create a copy. + if (MatchReg && SrcRC->getCopyCost() < 0) { + VRBase = SrcReg; + } else { + // Create the reg, emit the copy. + VRBase = MRI.createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*BB, InsertPos, VRBase, SrcReg, + DstRC, SrcRC); + + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + } + + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// getDstOfCopyToRegUse - If the only use of the specified result number of +/// node is a CopyToReg, return its destination register. Return 0 otherwise. +unsigned ScheduleDAGSDNodes::getDstOfOnlyCopyToRegUse(SDNode *Node, + unsigned ResNo) const { + if (!Node->hasOneUse()) + return 0; + + SDNode *User = *Node->use_begin(); + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return Reg; + } + return 0; +} + +void ScheduleDAGSDNodes::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, + const TargetInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + assert(Node->getMachineOpcode() != TargetInstrInfo::IMPLICIT_DEF && + "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + + for (unsigned i = 0; i < II.getNumDefs(); ++i) { + // If the specific node value is only used by a CopyToReg and the dest reg + // is a vreg in the same register class, use the CopyToReg'd destination + // register instead of creating a new vreg. + unsigned VRBase = 0; + const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, II, i); + + if (!IsClone && !IsCloned) + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == i) { + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); + if (RegRC == RC) { + VRBase = Reg; + MI->addOperand(MachineOperand::CreateReg(Reg, true)); + break; + } + } + } + } + + // Create the result registers for this node and add the result regs to + // the machine instruction. + if (VRBase == 0) { + assert(RC && "Isn't a register operand!"); + VRBase = MRI.createVirtualRegister(RC); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + } + + SDValue Op(Node, i); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + } +} + +/// getVR - Return the virtual register corresponding to the specified result +/// of the specified node. +unsigned ScheduleDAGSDNodes::getVR(SDValue Op, + DenseMap<SDValue, unsigned> &VRBaseMap) { + if (Op.isMachineOpcode() && + Op.getMachineOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + // Add an IMPLICIT_DEF instruction before every use. + unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); + // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc + // does not include operand register class info. + if (!VReg) { + const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); + VReg = MRI.createVirtualRegister(RC); + } + BuildMI(BB, Op.getDebugLoc(), TII->get(TargetInstrInfo::IMPLICIT_DEF),VReg); + return VReg; + } + + DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + assert(I != VRBaseMap.end() && "Node emitted out of order - late"); + return I->second; +} + + +/// AddRegisterOperand - Add the specified register as an operand to the +/// specified machine instr. Insert register copies if the register is +/// not in the required register class. +void +ScheduleDAGSDNodes::AddRegisterOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap) { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Flag && + "Chain and flag operands should occur at end of operand list!"); + // Get/emit the operand. + unsigned VReg = getVR(Op, VRBaseMap); + assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); + + const TargetInstrDesc &TID = MI->getDesc(); + bool isOptDef = IIOpNum < TID.getNumOperands() && + TID.OpInfo[IIOpNum].isOptionalDef(); + + // If the instruction requires a register in a different class, create + // a new virtual register and copy the value into it. + if (II) { + const TargetRegisterClass *SrcRC = + MRI.getRegClass(VReg); + const TargetRegisterClass *DstRC = + getInstrOperandRegClass(TRI, *II, IIOpNum); + assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) && + "Don't have operand info for this instruction!"); + if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) { + unsigned NewVReg = MRI.createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*BB, InsertPos, NewVReg, VReg, + DstRC, SrcRC); + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + VReg = NewVReg; + } + } + + MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef)); +} + +/// AddOperand - Add the specified operand to the specified machine instr. II +/// specifies the instruction information for the node, and IIOpNum is the +/// operand number (in the II) that we are adding. IIOpNum and II are used for +/// assertions only. +void ScheduleDAGSDNodes::AddOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap) { + if (Op.isMachineOpcode()) { + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap); + } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateImm(C->getZExtValue())); + } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { + const ConstantFP *CFP = F->getConstantFPValue(); + MI->addOperand(MachineOperand::CreateFPImm(CFP)); + } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateReg(R->getReg(), false)); + } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(),TGA->getOffset())); + } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock())); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateFI(FI->getIndex())); + } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateJTI(JT->getIndex())); + } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) { + int Offset = CP->getOffset(); + unsigned Align = CP->getAlignment(); + const Type *Type = CP->getType(); + // MachineConstantPool wants an explicit alignment. + if (Align == 0) { + Align = TM.getTargetData()->getPrefTypeAlignment(Type); + if (Align == 0) { + // Alignment of vector types. FIXME! + Align = TM.getTargetData()->getTypeAllocSize(Type); + } + } + + unsigned Idx; + if (CP->isMachineConstantPoolEntry()) + Idx = ConstPool->getConstantPoolIndex(CP->getMachineCPVal(), Align); + else + Idx = ConstPool->getConstantPoolIndex(CP->getConstVal(), Align); + MI->addOperand(MachineOperand::CreateCPI(Idx, Offset)); + } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateES(ES->getSymbol())); + } else { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Flag && + "Chain and flag operands should occur at end of operand list!"); + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap); + } +} + +/// getSuperRegisterRegClass - Returns the register class of a superreg A whose +/// "SubIdx"'th sub-register class is the specified register class and whose +/// type matches the specified type. +static const TargetRegisterClass* +getSuperRegisterRegClass(const TargetRegisterClass *TRC, + unsigned SubIdx, MVT VT) { + // Pick the register class of the superegister for this type + for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(), + E = TRC->superregclasses_end(); I != E; ++I) + if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC) + return *I; + assert(false && "Couldn't find the register class"); + return 0; +} + +/// EmitSubregNode - Generate machine code for subreg nodes. +/// +void ScheduleDAGSDNodes::EmitSubregNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VRBase = 0; + unsigned Opc = Node->getMachineOpcode(); + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node) { + unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + break; + } + } + } + + if (Opc == TargetInstrInfo::EXTRACT_SUBREG) { + unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + + // Create the extract_subreg machine instruction. + MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), + TII->get(TargetInstrInfo::EXTRACT_SUBREG)); + + // Figure out the register class to create for the destreg. + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + const TargetRegisterClass *TRC = MRI.getRegClass(VReg); + const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx); + assert(SRC && "Invalid subregister index in EXTRACT_SUBREG"); + + // Figure out the register class to create for the destreg. + // Note that if we're going to directly use an existing register, + // it must be precisely the required class, and not a subclass + // thereof. + if (VRBase == 0 || SRC != MRI.getRegClass(VRBase)) { + // Create the reg + assert(SRC && "Couldn't find source register class"); + VRBase = MRI.createVirtualRegister(SRC); + } + + // Add def, source, and subreg index + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap); + MI->addOperand(MachineOperand::CreateImm(SubIdx)); + BB->insert(InsertPos, MI); + } else if (Opc == TargetInstrInfo::INSERT_SUBREG || + Opc == TargetInstrInfo::SUBREG_TO_REG) { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + unsigned SubReg = getVR(N1, VRBaseMap); + unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); + const TargetRegisterClass *TRC = MRI.getRegClass(SubReg); + const TargetRegisterClass *SRC = + getSuperRegisterRegClass(TRC, SubIdx, + Node->getValueType(0)); + + // Figure out the register class to create for the destreg. + // Note that if we're going to directly use an existing register, + // it must be precisely the required class, and not a subclass + // thereof. + if (VRBase == 0 || SRC != MRI.getRegClass(VRBase)) { + // Create the reg + assert(SRC && "Couldn't find source register class"); + VRBase = MRI.createVirtualRegister(SRC); + } + + // Create the insert_subreg or subreg_to_reg machine instruction. + MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), TII->get(Opc)); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + + // If creating a subreg_to_reg, then the first input operand + // is an implicit value immediate, otherwise it's a register + if (Opc == TargetInstrInfo::SUBREG_TO_REG) { + const ConstantSDNode *SD = cast<ConstantSDNode>(N0); + MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); + } else + AddOperand(MI, N0, 0, 0, VRBaseMap); + // Add the subregster being inserted + AddOperand(MI, N1, 0, 0, VRBaseMap); + MI->addOperand(MachineOperand::CreateImm(SubIdx)); + BB->insert(InsertPos, MI); + } else + assert(0 && "Node is not insert_subreg, extract_subreg, or subreg_to_reg"); + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. +/// COPY_TO_REGCLASS is just a normal copy, except that the destination +/// register is constrained to be in a particular register class. +/// +void +ScheduleDAGSDNodes::EmitCopyToRegClassNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + const TargetRegisterClass *SrcRC = MRI.getRegClass(VReg); + + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx); + + // Create the new VReg in the destination class and emit a copy. + unsigned NewVReg = MRI.createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*BB, InsertPos, NewVReg, VReg, + DstRC, SrcRC); + assert(Emitted && + "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n"); + (void) Emitted; + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitNode - Generate machine code for an node and needed dependencies. +/// +void ScheduleDAGSDNodes::EmitNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + // If machine instruction + if (Node->isMachineOpcode()) { + unsigned Opc = Node->getMachineOpcode(); + + // Handle subreg insert/extract specially + if (Opc == TargetInstrInfo::EXTRACT_SUBREG || + Opc == TargetInstrInfo::INSERT_SUBREG || + Opc == TargetInstrInfo::SUBREG_TO_REG) { + EmitSubregNode(Node, VRBaseMap); + return; + } + + // Handle COPY_TO_REGCLASS specially. + if (Opc == TargetInstrInfo::COPY_TO_REGCLASS) { + EmitCopyToRegClassNode(Node, VRBaseMap); + return; + } + + if (Opc == TargetInstrInfo::IMPLICIT_DEF) + // We want a unique VR for each IMPLICIT_DEF use. + return; + + const TargetInstrDesc &II = TII->get(Opc); + unsigned NumResults = CountResults(Node); + unsigned NodeOperands = CountOperands(Node); + unsigned MemOperandsEnd = ComputeMemOperandsEnd(Node); + bool HasPhysRegOuts = (NumResults > II.getNumDefs()) && + II.getImplicitDefs() != 0; +#ifndef NDEBUG + unsigned NumMIOperands = NodeOperands + NumResults; + assert((II.getNumOperands() == NumMIOperands || + HasPhysRegOuts || II.isVariadic()) && + "#operands for dag node doesn't match .td file!"); +#endif + + // Create the new machine instruction. + MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), II); + + // Add result register values for things that are defined by this + // instruction. + if (NumResults) + CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap); + + // Emit all of the actual operands of this instruction, adding them to the + // instruction as appropriate. + for (unsigned i = 0; i != NodeOperands; ++i) + AddOperand(MI, Node->getOperand(i), i+II.getNumDefs(), &II, VRBaseMap); + + // Emit all of the memory operands of this instruction + for (unsigned i = NodeOperands; i != MemOperandsEnd; ++i) + AddMemOperand(MI, cast<MemOperandSDNode>(Node->getOperand(i))->MO); + + if (II.usesCustomDAGSchedInsertionHook()) { + // Insert this instruction into the basic block using a target + // specific inserter which may returns a new basic block. + BB = TLI->EmitInstrWithCustomInserter(MI, BB); + InsertPos = BB->end(); + } else { + BB->insert(InsertPos, MI); + } + + // Additional results must be an physical register def. + if (HasPhysRegOuts) { + for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + if (Node->hasAnyUseOfValue(i)) + EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); + } + } + return; + } + + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + Node->dump(DAG); +#endif + assert(0 && "This target-independent node should have been selected!"); + break; + case ISD::EntryToken: + assert(0 && "EntryToken should have been excluded from the schedule!"); + break; + case ISD::TokenFactor: // fall thru + break; + case ISD::CopyToReg: { + unsigned SrcReg; + SDValue SrcVal = Node->getOperand(2); + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal)) + SrcReg = R->getReg(); + else + SrcReg = getVR(SrcVal, VRBaseMap); + + unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + if (SrcReg == DestReg) // Coalesced away the copy? Ignore. + break; + + const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0; + // Get the register classes of the src/dst. + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) + SrcTRC = MRI.getRegClass(SrcReg); + else + SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType()); + + if (TargetRegisterInfo::isVirtualRegister(DestReg)) + DstTRC = MRI.getRegClass(DestReg); + else + DstTRC = TRI->getPhysicalRegisterRegClass(DestReg, + Node->getOperand(1).getValueType()); + + bool Emitted = TII->copyRegToReg(*BB, InsertPos, DestReg, SrcReg, + DstTRC, SrcTRC); + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + break; + } + case ISD::CopyFromReg: { + unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); + break; + } + case ISD::INLINEASM: { + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) + --NumOps; // Ignore the flag operand. + + // Create the inline asm machine instruction. + MachineInstr *MI = BuildMI(MF, Node->getDebugLoc(), + TII->get(TargetInstrInfo::INLINEASM)); + + // Add the asm string as an external symbol operand. + const char *AsmStr = + cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol(); + MI->addOperand(MachineOperand::CreateES(AsmStr)); + + // Add all of the operand registers to the instruction. + for (unsigned i = 2; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + MI->addOperand(MachineOperand::CreateImm(Flags)); + ++i; // Skip the ID value. + + switch (Flags & 7) { + default: assert(0 && "Bad flags!"); + case 2: // Def of register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + MI->addOperand(MachineOperand::CreateReg(Reg, true)); + } + break; + case 6: // Def of earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, + false, 0, true)); + } + break; + case 1: // Use of register. + case 3: // Immediate. + case 4: // Addressing mode. + // The addressing mode has been selected, just add all of the + // operands to the machine instruction. + for (; NumVals; --NumVals, ++i) + AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap); + break; + } + } + BB->insert(InsertPos, MI); + break; + } + } +} + +/// EmitSchedule - Emit the machine code in scheduled order. +MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { + DenseMap<SDValue, unsigned> VRBaseMap; + DenseMap<SUnit*, unsigned> CopyVRBaseMap; + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + SUnit *SU = Sequence[i]; + if (!SU) { + // Null SUnit* is a noop. + EmitNoop(); + continue; + } + + // For pre-regalloc scheduling, create instructions corresponding to the + // SDNode and any flagged SDNodes and append them to the block. + if (!SU->getNode()) { + // Emit a copy. + EmitPhysRegCopy(SU, CopyVRBaseMap); + continue; + } + + SmallVector<SDNode *, 4> FlaggedNodes; + for (SDNode *N = SU->getNode()->getFlaggedNode(); N; + N = N->getFlaggedNode()) + FlaggedNodes.push_back(N); + while (!FlaggedNodes.empty()) { + EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,VRBaseMap); + FlaggedNodes.pop_back(); + } + EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); + } + + return BB; +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp new file mode 100644 index 000000000000..195896ee89dc --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -0,0 +1,5743 @@ +//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG class. +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Constants.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/GlobalAlias.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Intrinsics.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include <algorithm> +#include <cmath> +using namespace llvm; + +/// makeVTList - Return an instance of the SDVTList struct initialized with the +/// specified members. +static SDVTList makeVTList(const MVT *VTs, unsigned NumVTs) { + SDVTList Res = {VTs, NumVTs}; + return Res; +} + +static const fltSemantics *MVTToAPFloatSemantics(MVT VT) { + switch (VT.getSimpleVT()) { + default: assert(0 && "Unknown FP format"); + case MVT::f32: return &APFloat::IEEEsingle; + case MVT::f64: return &APFloat::IEEEdouble; + case MVT::f80: return &APFloat::x87DoubleExtended; + case MVT::f128: return &APFloat::IEEEquad; + case MVT::ppcf128: return &APFloat::PPCDoubleDouble; + } +} + +SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {} + +//===----------------------------------------------------------------------===// +// ConstantFPSDNode Class +//===----------------------------------------------------------------------===// + +/// isExactlyValue - We don't rely on operator== working on double values, as +/// it returns true for things that are clearly not equal, like -0.0 and 0.0. +/// As such, this method can be used to do an exact bit-for-bit comparison of +/// two floating point values. +bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const { + return getValueAPF().bitwiseIsEqual(V); +} + +bool ConstantFPSDNode::isValueValidForType(MVT VT, + const APFloat& Val) { + assert(VT.isFloatingPoint() && "Can only convert between FP types"); + + // PPC long double cannot be converted to any other type. + if (VT == MVT::ppcf128 || + &Val.getSemantics() == &APFloat::PPCDoubleDouble) + return false; + + // convert modifies in place, so make a copy. + APFloat Val2 = APFloat(Val); + bool losesInfo; + (void) Val2.convert(*MVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + &losesInfo); + return !losesInfo; +} + +//===----------------------------------------------------------------------===// +// ISD Namespace +//===----------------------------------------------------------------------===// + +/// isBuildVectorAllOnes - Return true if the specified node is a +/// BUILD_VECTOR where all of the elements are ~0 or undef. +bool ISD::isBuildVectorAllOnes(const SDNode *N) { + // Look through a bit convert. + if (N->getOpcode() == ISD::BIT_CONVERT) + N = N->getOperand(0).getNode(); + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + unsigned i = 0, e = N->getNumOperands(); + + // Skip over all of the undef values. + while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + ++i; + + // Do not accept an all-undef vector. + if (i == e) return false; + + // Do not accept build_vectors that aren't all constants or which have non-~0 + // elements. + SDValue NotZero = N->getOperand(i); + if (isa<ConstantSDNode>(NotZero)) { + if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue()) + return false; + } else if (isa<ConstantFPSDNode>(NotZero)) { + if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF(). + bitcastToAPInt().isAllOnesValue()) + return false; + } else + return false; + + // Okay, we have at least one ~0 value, check to see if the rest match or are + // undefs. + for (++i; i != e; ++i) + if (N->getOperand(i) != NotZero && + N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + return true; +} + + +/// isBuildVectorAllZeros - Return true if the specified node is a +/// BUILD_VECTOR where all of the elements are 0 or undef. +bool ISD::isBuildVectorAllZeros(const SDNode *N) { + // Look through a bit convert. + if (N->getOpcode() == ISD::BIT_CONVERT) + N = N->getOperand(0).getNode(); + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + unsigned i = 0, e = N->getNumOperands(); + + // Skip over all of the undef values. + while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + ++i; + + // Do not accept an all-undef vector. + if (i == e) return false; + + // Do not accept build_vectors that aren't all constants or which have non-~0 + // elements. + SDValue Zero = N->getOperand(i); + if (isa<ConstantSDNode>(Zero)) { + if (!cast<ConstantSDNode>(Zero)->isNullValue()) + return false; + } else if (isa<ConstantFPSDNode>(Zero)) { + if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero()) + return false; + } else + return false; + + // Okay, we have at least one ~0 value, check to see if the rest match or are + // undefs. + for (++i; i != e; ++i) + if (N->getOperand(i) != Zero && + N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + return true; +} + +/// isScalarToVector - Return true if the specified node is a +/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low +/// element is not an undef. +bool ISD::isScalarToVector(const SDNode *N) { + if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) + return true; + + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + if (N->getOperand(0).getOpcode() == ISD::UNDEF) + return false; + unsigned NumElems = N->getNumOperands(); + for (unsigned i = 1; i < NumElems; ++i) { + SDValue V = N->getOperand(i); + if (V.getOpcode() != ISD::UNDEF) + return false; + } + return true; +} + + +/// isDebugLabel - Return true if the specified node represents a debug +/// label (i.e. ISD::DBG_LABEL or TargetInstrInfo::DBG_LABEL node). +bool ISD::isDebugLabel(const SDNode *N) { + SDValue Zero; + if (N->getOpcode() == ISD::DBG_LABEL) + return true; + if (N->isMachineOpcode() && + N->getMachineOpcode() == TargetInstrInfo::DBG_LABEL) + return true; + return false; +} + +/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) +/// when given the operation for (X op Y). +ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { + // To perform this operation, we just need to swap the L and G bits of the + // operation. + unsigned OldL = (Operation >> 2) & 1; + unsigned OldG = (Operation >> 1) & 1; + return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits + (OldL << 1) | // New G bit + (OldG << 2)); // New L bit. +} + +/// getSetCCInverse - Return the operation corresponding to !(X op Y), where +/// 'op' is a valid SetCC operation. +ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { + unsigned Operation = Op; + if (isInteger) + Operation ^= 7; // Flip L, G, E bits, but not U. + else + Operation ^= 15; // Flip all of the condition bits. + + if (Operation > ISD::SETTRUE2) + Operation &= ~8; // Don't let N and U bits get set. + + return ISD::CondCode(Operation); +} + + +/// isSignedOp - For an integer comparison, return 1 if the comparison is a +/// signed operation and 2 if the result is an unsigned comparison. Return zero +/// if the operation does not depend on the sign of the input (setne and seteq). +static int isSignedOp(ISD::CondCode Opcode) { + switch (Opcode) { + default: assert(0 && "Illegal integer setcc operation!"); + case ISD::SETEQ: + case ISD::SETNE: return 0; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: return 1; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: return 2; + } +} + +/// getSetCCOrOperation - Return the result of a logical OR between different +/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function +/// returns SETCC_INVALID if it is not possible to represent the resultant +/// comparison. +ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool isInteger) { + if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed integer setcc with an unsigned integer setcc. + return ISD::SETCC_INVALID; + + unsigned Op = Op1 | Op2; // Combine all of the condition bits. + + // If the N and U bits get set then the resultant comparison DOES suddenly + // care about orderedness, and is true when ordered. + if (Op > ISD::SETTRUE2) + Op &= ~16; // Clear the U bit if the N bit is set. + + // Canonicalize illegal integer setcc's. + if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT + Op = ISD::SETNE; + + return ISD::CondCode(Op); +} + +/// getSetCCAndOperation - Return the result of a logical AND between different +/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This +/// function returns zero if it is not possible to represent the resultant +/// comparison. +ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool isInteger) { + if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed setcc with an unsigned setcc. + return ISD::SETCC_INVALID; + + // Combine all of the condition bits. + ISD::CondCode Result = ISD::CondCode(Op1 & Op2); + + // Canonicalize illegal integer setcc's. + if (isInteger) { + switch (Result) { + default: break; + case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT + case ISD::SETOEQ: // SETEQ & SETU[LG]E + case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE + case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE + case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE + } + } + + return Result; +} + +const TargetMachine &SelectionDAG::getTarget() const { + return MF->getTarget(); +} + +//===----------------------------------------------------------------------===// +// SDNode Profile Support +//===----------------------------------------------------------------------===// + +/// AddNodeIDOpcode - Add the node opcode to the NodeID data. +/// +static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { + ID.AddInteger(OpC); +} + +/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them +/// solely with their pointer. +static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { + ID.AddPointer(VTList.VTs); +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +/// +static void AddNodeIDOperands(FoldingSetNodeID &ID, + const SDValue *Ops, unsigned NumOps) { + for (; NumOps; --NumOps, ++Ops) { + ID.AddPointer(Ops->getNode()); + ID.AddInteger(Ops->getResNo()); + } +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +/// +static void AddNodeIDOperands(FoldingSetNodeID &ID, + const SDUse *Ops, unsigned NumOps) { + for (; NumOps; --NumOps, ++Ops) { + ID.AddPointer(Ops->getNode()); + ID.AddInteger(Ops->getResNo()); + } +} + +static void AddNodeIDNode(FoldingSetNodeID &ID, + unsigned short OpC, SDVTList VTList, + const SDValue *OpList, unsigned N) { + AddNodeIDOpcode(ID, OpC); + AddNodeIDValueTypes(ID, VTList); + AddNodeIDOperands(ID, OpList, N); +} + +/// AddNodeIDCustom - If this is an SDNode with special info, add this info to +/// the NodeID data. +static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { + switch (N->getOpcode()) { + default: break; // Normal nodes don't need extra info. + case ISD::ARG_FLAGS: + ID.AddInteger(cast<ARG_FLAGSSDNode>(N)->getArgFlags().getRawBits()); + break; + case ISD::TargetConstant: + case ISD::Constant: + ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue()); + break; + case ISD::TargetConstantFP: + case ISD::ConstantFP: { + ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue()); + break; + } + case ISD::TargetGlobalAddress: + case ISD::GlobalAddress: + case ISD::TargetGlobalTLSAddress: + case ISD::GlobalTLSAddress: { + const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); + ID.AddPointer(GA->getGlobal()); + ID.AddInteger(GA->getOffset()); + break; + } + case ISD::BasicBlock: + ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock()); + break; + case ISD::Register: + ID.AddInteger(cast<RegisterSDNode>(N)->getReg()); + break; + case ISD::DBG_STOPPOINT: { + const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(N); + ID.AddInteger(DSP->getLine()); + ID.AddInteger(DSP->getColumn()); + ID.AddPointer(DSP->getCompileUnit()); + break; + } + case ISD::SRCVALUE: + ID.AddPointer(cast<SrcValueSDNode>(N)->getValue()); + break; + case ISD::MEMOPERAND: { + const MachineMemOperand &MO = cast<MemOperandSDNode>(N)->MO; + MO.Profile(ID); + break; + } + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); + break; + case ISD::JumpTable: + case ISD::TargetJumpTable: + ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex()); + break; + case ISD::ConstantPool: + case ISD::TargetConstantPool: { + const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N); + ID.AddInteger(CP->getAlignment()); + ID.AddInteger(CP->getOffset()); + if (CP->isMachineConstantPoolEntry()) + CP->getMachineCPVal()->AddSelectionDAGCSEId(ID); + else + ID.AddPointer(CP->getConstVal()); + break; + } + case ISD::CALL: { + const CallSDNode *Call = cast<CallSDNode>(N); + ID.AddInteger(Call->getCallingConv()); + ID.AddInteger(Call->isVarArg()); + break; + } + case ISD::LOAD: { + const LoadSDNode *LD = cast<LoadSDNode>(N); + ID.AddInteger(LD->getMemoryVT().getRawBits()); + ID.AddInteger(LD->getRawSubclassData()); + break; + } + case ISD::STORE: { + const StoreSDNode *ST = cast<StoreSDNode>(N); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + break; + } + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: { + const AtomicSDNode *AT = cast<AtomicSDNode>(N); + ID.AddInteger(AT->getMemoryVT().getRawBits()); + ID.AddInteger(AT->getRawSubclassData()); + break; + } + case ISD::VECTOR_SHUFFLE: { + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); + i != e; ++i) + ID.AddInteger(SVN->getMaskElt(i)); + break; + } + } // end switch (N->getOpcode()) +} + +/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID +/// data. +static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { + AddNodeIDOpcode(ID, N->getOpcode()); + // Add the return value info. + AddNodeIDValueTypes(ID, N->getVTList()); + // Add the operand info. + AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands()); + + // Handle SDNode leafs with special info. + AddNodeIDCustom(ID, N); +} + +/// encodeMemSDNodeFlags - Generic routine for computing a value for use in +/// the CSE map that carries alignment, volatility, indexing mode, and +/// extension/truncation information. +/// +static inline unsigned +encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, + bool isVolatile, unsigned Alignment) { + assert((ConvType & 3) == ConvType && + "ConvType may not require more than 2 bits!"); + assert((AM & 7) == AM && + "AM may not require more than 3 bits!"); + return ConvType | + (AM << 2) | + (isVolatile << 5) | + ((Log2_32(Alignment) + 1) << 6); +} + +//===----------------------------------------------------------------------===// +// SelectionDAG Class +//===----------------------------------------------------------------------===// + +/// doNotCSE - Return true if CSE should not be performed for this node. +static bool doNotCSE(SDNode *N) { + if (N->getValueType(0) == MVT::Flag) + return true; // Never CSE anything that produces a flag. + + switch (N->getOpcode()) { + default: break; + case ISD::HANDLENODE: + case ISD::DBG_LABEL: + case ISD::DBG_STOPPOINT: + case ISD::EH_LABEL: + case ISD::DECLARE: + return true; // Never CSE these nodes. + } + + // Check that remaining values produced are not flags. + for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) + if (N->getValueType(i) == MVT::Flag) + return true; // Never CSE anything that produces a flag. + + return false; +} + +/// RemoveDeadNodes - This method deletes all unreachable nodes in the +/// SelectionDAG. +void SelectionDAG::RemoveDeadNodes() { + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted. + HandleSDNode Dummy(getRoot()); + + SmallVector<SDNode*, 128> DeadNodes; + + // Add all obviously-dead nodes to the DeadNodes worklist. + for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) + if (I->use_empty()) + DeadNodes.push_back(I); + + RemoveDeadNodes(DeadNodes); + + // If the root changed (e.g. it was a dead load, update the root). + setRoot(Dummy.getValue()); +} + +/// RemoveDeadNodes - This method deletes the unreachable nodes in the +/// given list, and any nodes that become unreachable as a result. +void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes, + DAGUpdateListener *UpdateListener) { + + // Process the worklist, deleting the nodes and adding their uses to the + // worklist. + while (!DeadNodes.empty()) { + SDNode *N = DeadNodes.pop_back_val(); + + if (UpdateListener) + UpdateListener->NodeDeleted(N, 0); + + // Take the node out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Next, brutally remove the operand list. This is safe to do, as there are + // no cycles in the graph. + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { + SDUse &Use = *I++; + SDNode *Operand = Use.getNode(); + Use.set(SDValue()); + + // Now that we removed this operand, see if there are no uses of it left. + if (Operand->use_empty()) + DeadNodes.push_back(Operand); + } + + DeallocateNode(N); + } +} + +void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){ + SmallVector<SDNode*, 16> DeadNodes(1, N); + RemoveDeadNodes(DeadNodes, UpdateListener); +} + +void SelectionDAG::DeleteNode(SDNode *N) { + // First take this out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Finally, remove uses due to operands of this node, remove from the + // AllNodes list, and delete the node. + DeleteNodeNotInCSEMaps(N); +} + +void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { + assert(N != AllNodes.begin() && "Cannot delete the entry node!"); + assert(N->use_empty() && "Cannot delete a node that is not dead!"); + + // Drop all of the operands and decrement used node's use counts. + N->DropOperands(); + + DeallocateNode(N); +} + +void SelectionDAG::DeallocateNode(SDNode *N) { + if (N->OperandsNeedDelete) + delete[] N->OperandList; + + // Set the opcode to DELETED_NODE to help catch bugs when node + // memory is reallocated. + N->NodeType = ISD::DELETED_NODE; + + NodeAllocator.Deallocate(AllNodes.remove(N)); +} + +/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that +/// correspond to it. This is useful when we're about to delete or repurpose +/// the node. We don't want future request for structurally identical nodes +/// to return N anymore. +bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { + bool Erased = false; + switch (N->getOpcode()) { + case ISD::EntryToken: + assert(0 && "EntryToken should not be in CSEMaps!"); + return false; + case ISD::HANDLENODE: return false; // noop. + case ISD::CONDCODE: + assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && + "Cond code doesn't exist!"); + Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0; + CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0; + break; + case ISD::ExternalSymbol: + Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); + break; + case ISD::TargetExternalSymbol: + Erased = + TargetExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); + break; + case ISD::VALUETYPE: { + MVT VT = cast<VTSDNode>(N)->getVT(); + if (VT.isExtended()) { + Erased = ExtendedValueTypeNodes.erase(VT); + } else { + Erased = ValueTypeNodes[VT.getSimpleVT()] != 0; + ValueTypeNodes[VT.getSimpleVT()] = 0; + } + break; + } + default: + // Remove it from the CSE Map. + Erased = CSEMap.RemoveNode(N); + break; + } +#ifndef NDEBUG + // Verify that the node was actually in one of the CSE maps, unless it has a + // flag result (which cannot be CSE'd) or is one of the special cases that are + // not subject to CSE. + if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag && + !N->isMachineOpcode() && !doNotCSE(N)) { + N->dump(this); + cerr << "\n"; + assert(0 && "Node is not in map!"); + } +#endif + return Erased; +} + +/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE +/// maps and modified in place. Add it back to the CSE maps, unless an identical +/// node already exists, in which case transfer all its users to the existing +/// node. This transfer can potentially trigger recursive merging. +/// +void +SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N, + DAGUpdateListener *UpdateListener) { + // For node types that aren't CSE'd, just act as if no identical node + // already exists. + if (!doNotCSE(N)) { + SDNode *Existing = CSEMap.GetOrInsertNode(N); + if (Existing != N) { + // If there was already an existing matching node, use ReplaceAllUsesWith + // to replace the dead one with the existing one. This can cause + // recursive merging of other unrelated nodes down the line. + ReplaceAllUsesWith(N, Existing, UpdateListener); + + // N is now dead. Inform the listener if it exists and delete it. + if (UpdateListener) + UpdateListener->NodeDeleted(N, Existing); + DeleteNodeNotInCSEMaps(N); + return; + } + } + + // If the node doesn't already exist, we updated it. Inform a listener if + // it exists. + if (UpdateListener) + UpdateListener->NodeUpdated(N); +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + SDValue Ops[] = { Op }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); + AddNodeIDCustom(ID, N); + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, + SDValue Op1, SDValue Op2, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + SDValue Ops[] = { Op1, Op2 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); + AddNodeIDCustom(ID, N); + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); +} + + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, + const SDValue *Ops,unsigned NumOps, + void *&InsertPos) { + if (doNotCSE(N)) + return 0; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); + AddNodeIDCustom(ID, N); + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); +} + +/// VerifyNode - Sanity check the given node. Aborts if it is invalid. +void SelectionDAG::VerifyNode(SDNode *N) { + switch (N->getOpcode()) { + default: + break; + case ISD::BUILD_PAIR: { + MVT VT = N->getValueType(0); + assert(N->getNumValues() == 1 && "Too many results!"); + assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && + "Wrong return type!"); + assert(N->getNumOperands() == 2 && "Wrong number of operands!"); + assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && + "Mismatched operand types!"); + assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && + "Wrong operand type!"); + assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && + "Wrong return type size"); + break; + } + case ISD::BUILD_VECTOR: { + assert(N->getNumValues() == 1 && "Too many results!"); + assert(N->getValueType(0).isVector() && "Wrong return type!"); + assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && + "Wrong number of operands!"); + MVT EltVT = N->getValueType(0).getVectorElementType(); + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) + assert((I->getValueType() == EltVT || + (EltVT.isInteger() && I->getValueType().isInteger() && + EltVT.bitsLE(I->getValueType()))) && + "Wrong operand type!"); + break; + } + } +} + +/// getMVTAlignment - Compute the default alignment value for the +/// given type. +/// +unsigned SelectionDAG::getMVTAlignment(MVT VT) const { + const Type *Ty = VT == MVT::iPTR ? + PointerType::get(Type::Int8Ty, 0) : + VT.getTypeForMVT(); + + return TLI.getTargetData()->getABITypeAlignment(Ty); +} + +// EntryNode could meaningfully have debug info if we can find it... +SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli) + : TLI(tli), FLI(fli), DW(0), + EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(), + getVTList(MVT::Other)), Root(getEntryNode()) { + AllNodes.push_back(&EntryNode); +} + +void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, + DwarfWriter *dw) { + MF = &mf; + MMI = mmi; + DW = dw; +} + +SelectionDAG::~SelectionDAG() { + allnodes_clear(); +} + +void SelectionDAG::allnodes_clear() { + assert(&*AllNodes.begin() == &EntryNode); + AllNodes.remove(AllNodes.begin()); + while (!AllNodes.empty()) + DeallocateNode(AllNodes.begin()); +} + +void SelectionDAG::clear() { + allnodes_clear(); + OperandAllocator.Reset(); + CSEMap.clear(); + + ExtendedValueTypeNodes.clear(); + ExternalSymbols.clear(); + TargetExternalSymbols.clear(); + std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), + static_cast<CondCodeSDNode*>(0)); + std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), + static_cast<SDNode*>(0)); + + EntryNode.UseList = 0; + AllNodes.push_back(&EntryNode); + Root = getEntryNode(); +} + +SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT VT) { + if (Op.getValueType() == VT) return Op; + APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(), + VT.getSizeInBits()); + return getNode(ISD::AND, DL, Op.getValueType(), Op, + getConstant(Imm, Op.getValueType())); +} + +/// getNOT - Create a bitwise NOT operation as (XOR Val, -1). +/// +SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, MVT VT) { + MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + SDValue NegOne = + getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + return getNode(ISD::XOR, DL, VT, Val, NegOne); +} + +SDValue SelectionDAG::getConstant(uint64_t Val, MVT VT, bool isT) { + MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + assert((EltVT.getSizeInBits() >= 64 || + (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && + "getConstant with a uint64_t value that doesn't fit in the type!"); + return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT); +} + +SDValue SelectionDAG::getConstant(const APInt &Val, MVT VT, bool isT) { + return getConstant(*ConstantInt::get(Val), VT, isT); +} + +SDValue SelectionDAG::getConstant(const ConstantInt &Val, MVT VT, bool isT) { + assert(VT.isInteger() && "Cannot create FP integer constant!"); + + MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + assert(Val.getBitWidth() == EltVT.getSizeInBits() && + "APInt size does not match type size!"); + + unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + ID.AddPointer(&Val); + void *IP = 0; + SDNode *N = NULL; + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if (!VT.isVector()) + return SDValue(N, 0); + if (!N) { + N = NodeAllocator.Allocate<ConstantSDNode>(); + new (N) ConstantSDNode(isT, &Val, EltVT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + } + + SDValue Result(N, 0); + if (VT.isVector()) { + SmallVector<SDValue, 8> Ops; + Ops.assign(VT.getVectorNumElements(), Result); + Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(), + VT, &Ops[0], Ops.size()); + } + return Result; +} + +SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { + return getConstant(Val, TLI.getPointerTy(), isTarget); +} + + +SDValue SelectionDAG::getConstantFP(const APFloat& V, MVT VT, bool isTarget) { + return getConstantFP(*ConstantFP::get(V), VT, isTarget); +} + +SDValue SelectionDAG::getConstantFP(const ConstantFP& V, MVT VT, bool isTarget){ + assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); + + MVT EltVT = + VT.isVector() ? VT.getVectorElementType() : VT; + + // Do the map lookup using the actual bit pattern for the floating point + // value, so that we don't have problems with 0.0 comparing equal to -0.0, and + // we don't have issues with SNANs. + unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + ID.AddPointer(&V); + void *IP = 0; + SDNode *N = NULL; + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if (!VT.isVector()) + return SDValue(N, 0); + if (!N) { + N = NodeAllocator.Allocate<ConstantFPSDNode>(); + new (N) ConstantFPSDNode(isTarget, &V, EltVT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + } + + SDValue Result(N, 0); + if (VT.isVector()) { + SmallVector<SDValue, 8> Ops; + Ops.assign(VT.getVectorNumElements(), Result); + // FIXME DebugLoc info might be appropriate here + Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(), + VT, &Ops[0], Ops.size()); + } + return Result; +} + +SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) { + MVT EltVT = + VT.isVector() ? VT.getVectorElementType() : VT; + if (EltVT==MVT::f32) + return getConstantFP(APFloat((float)Val), VT, isTarget); + else + return getConstantFP(APFloat(Val), VT, isTarget); +} + +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, + MVT VT, int64_t Offset, + bool isTargetGA) { + unsigned Opc; + + // Truncate (with sign-extension) the offset value to the pointer size. + unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); + if (BitWidth < 64) + Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth)); + + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + if (!GVar) { + // If GV is an alias then use the aliasee for determining thread-localness. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)); + } + + if (GVar && GVar->isThreadLocal()) + Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; + else + Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddPointer(GV); + ID.AddInteger(Offset); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>(); + new (N) GlobalAddressSDNode(isTargetGA, GV, VT, Offset); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) { + unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(FI); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>(); + new (N) FrameIndexSDNode(FI, VT, isTarget); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget){ + unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(JTI); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>(); + new (N) JumpTableSDNode(JTI, VT, isTarget); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT, + unsigned Alignment, int Offset, + bool isTarget) { + if (Alignment == 0) + Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType()); + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + ID.AddPointer(C); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>(); + new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + + +SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT, + unsigned Alignment, int Offset, + bool isTarget) { + if (Alignment == 0) + Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType()); + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + C->AddSelectionDAGCSEId(ID); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>(); + new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); + ID.AddPointer(MBB); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>(); + new (N) BasicBlockSDNode(MBB); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getArgFlags(ISD::ArgFlagsTy Flags) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::ARG_FLAGS, getVTList(MVT::Other), 0, 0); + ID.AddInteger(Flags.getRawBits()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<ARG_FLAGSSDNode>(); + new (N) ARG_FLAGSSDNode(Flags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getValueType(MVT VT) { + if (VT.isSimple() && (unsigned)VT.getSimpleVT() >= ValueTypeNodes.size()) + ValueTypeNodes.resize(VT.getSimpleVT()+1); + + SDNode *&N = VT.isExtended() ? + ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT()]; + + if (N) return SDValue(N, 0); + N = NodeAllocator.Allocate<VTSDNode>(); + new (N) VTSDNode(VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) { + SDNode *&N = ExternalSymbols[Sym]; + if (N) return SDValue(N, 0); + N = NodeAllocator.Allocate<ExternalSymbolSDNode>(); + new (N) ExternalSymbolSDNode(false, Sym, VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT VT) { + SDNode *&N = TargetExternalSymbols[Sym]; + if (N) return SDValue(N, 0); + N = NodeAllocator.Allocate<ExternalSymbolSDNode>(); + new (N) ExternalSymbolSDNode(true, Sym, VT); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { + if ((unsigned)Cond >= CondCodeNodes.size()) + CondCodeNodes.resize(Cond+1); + + if (CondCodeNodes[Cond] == 0) { + CondCodeSDNode *N = NodeAllocator.Allocate<CondCodeSDNode>(); + new (N) CondCodeSDNode(Cond); + CondCodeNodes[Cond] = N; + AllNodes.push_back(N); + } + return SDValue(CondCodeNodes[Cond], 0); +} + +// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in +// the shuffle mask M that point at N1 to point at N2, and indices that point +// N2 to point at N1. +static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { + std::swap(N1, N2); + int NElts = M.size(); + for (int i = 0; i != NElts; ++i) { + if (M[i] >= NElts) + M[i] -= NElts; + else if (M[i] >= 0) + M[i] += NElts; + } +} + +SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, + SDValue N2, const int *Mask) { + assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); + assert(VT.isVector() && N1.getValueType().isVector() && + "Vector Shuffle VTs must be a vectors"); + assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() + && "Vector Shuffle VTs must have same element type"); + + // Canonicalize shuffle undef, undef -> undef + if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) + return N1; + + // Validate that all indices in Mask are within the range of the elements + // input to the shuffle. + unsigned NElts = VT.getVectorNumElements(); + SmallVector<int, 8> MaskVec; + for (unsigned i = 0; i != NElts; ++i) { + assert(Mask[i] < (int)(NElts * 2) && "Index out of range"); + MaskVec.push_back(Mask[i]); + } + + // Canonicalize shuffle v, v -> v, undef + if (N1 == N2) { + N2 = getUNDEF(VT); + for (unsigned i = 0; i != NElts; ++i) + if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts; + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N1.getOpcode() == ISD::UNDEF) + commuteShuffle(N1, N2, MaskVec); + + // Canonicalize all index into lhs, -> shuffle lhs, undef + // Canonicalize all index into rhs, -> shuffle rhs, undef + bool AllLHS = true, AllRHS = true; + bool N2Undef = N2.getOpcode() == ISD::UNDEF; + for (unsigned i = 0; i != NElts; ++i) { + if (MaskVec[i] >= (int)NElts) { + if (N2Undef) + MaskVec[i] = -1; + else + AllLHS = false; + } else if (MaskVec[i] >= 0) { + AllRHS = false; + } + } + if (AllLHS && AllRHS) + return getUNDEF(VT); + if (AllLHS && !N2Undef) + N2 = getUNDEF(VT); + if (AllRHS) { + N1 = getUNDEF(VT); + commuteShuffle(N1, N2, MaskVec); + } + + // If Identity shuffle, or all shuffle in to undef, return that node. + bool AllUndef = true; + bool Identity = true; + for (unsigned i = 0; i != NElts; ++i) { + if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; + if (MaskVec[i] >= 0) AllUndef = false; + } + if (Identity) + return N1; + if (AllUndef) + return getUNDEF(VT); + + FoldingSetNodeID ID; + SDValue Ops[2] = { N1, N2 }; + AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2); + for (unsigned i = 0; i != NElts; ++i) + ID.AddInteger(MaskVec[i]); + + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + // Allocate the mask array for the node out of the BumpPtrAllocator, since + // SDNode doesn't have access to it. This memory will be "leaked" when + // the node is deallocated, but recovered when the NodeAllocator is released. + int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); + memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); + + ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>(); + new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl, + SDValue Val, SDValue DTy, + SDValue STy, SDValue Rnd, SDValue Sat, + ISD::CvtCode Code) { + // If the src and dest types are the same and the conversion is between + // integer types of the same sign or two floats, no conversion is necessary. + if (DTy == STy && + (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF)) + return Val; + + FoldingSetNodeID ID; + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>(); + SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; + new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getRegister(unsigned RegNo, MVT VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); + ID.AddInteger(RegNo); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<RegisterSDNode>(); + new (N) RegisterSDNode(RegNo, VT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getDbgStopPoint(DebugLoc DL, SDValue Root, + unsigned Line, unsigned Col, + Value *CU) { + SDNode *N = NodeAllocator.Allocate<DbgStopPointSDNode>(); + new (N) DbgStopPointSDNode(Root, Line, Col, CU); + N->setDebugLoc(DL); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl, + SDValue Root, + unsigned LabelID) { + FoldingSetNodeID ID; + SDValue Ops[] = { Root }; + AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1); + ID.AddInteger(LabelID); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<LabelSDNode>(); + new (N) LabelSDNode(Opcode, dl, Root, LabelID); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getSrcValue(const Value *V) { + assert((!V || isa<PointerType>(V->getType())) && + "SrcValue is not a pointer?"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0); + ID.AddPointer(V); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>(); + new (N) SrcValueSDNode(V); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getMemOperand(const MachineMemOperand &MO) { +#ifndef NDEBUG + const Value *v = MO.getValue(); + assert((!v || isa<PointerType>(v->getType())) && + "SrcValue is not a pointer?"); +#endif + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MEMOPERAND, getVTList(MVT::Other), 0, 0); + MO.Profile(ID); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = NodeAllocator.Allocate<MemOperandSDNode>(); + new (N) MemOperandSDNode(MO); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +/// getShiftAmountOperand - Return the specified value casted to +/// the target's desired shift amount type. +SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { + MVT OpTy = Op.getValueType(); + MVT ShTy = TLI.getShiftAmountTy(); + if (OpTy == ShTy || OpTy.isVector()) return Op; + + ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; + return getNode(Opcode, Op.getDebugLoc(), ShTy, Op); +} + +/// CreateStackTemporary - Create a stack temporary, suitable for holding the +/// specified value type. +SDValue SelectionDAG::CreateStackTemporary(MVT VT, unsigned minAlign) { + MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); + unsigned ByteSize = VT.getStoreSizeInBits()/8; + const Type *Ty = VT.getTypeForMVT(); + unsigned StackAlign = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign); + + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign); + return getFrameIndex(FrameIdx, TLI.getPointerTy()); +} + +/// CreateStackTemporary - Create a stack temporary suitable for holding +/// either of the specified value types. +SDValue SelectionDAG::CreateStackTemporary(MVT VT1, MVT VT2) { + unsigned Bytes = std::max(VT1.getStoreSizeInBits(), + VT2.getStoreSizeInBits())/8; + const Type *Ty1 = VT1.getTypeForMVT(); + const Type *Ty2 = VT2.getTypeForMVT(); + const TargetData *TD = TLI.getTargetData(); + unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), + TD->getPrefTypeAlignment(Ty2)); + + MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align); + return getFrameIndex(FrameIdx, TLI.getPointerTy()); +} + +SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1, + SDValue N2, ISD::CondCode Cond, DebugLoc dl) { + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return getConstant(0, VT); + case ISD::SETTRUE: + case ISD::SETTRUE2: return getConstant(1, VT); + + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETO: + case ISD::SETUO: + case ISD::SETUEQ: + case ISD::SETUNE: + assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!"); + break; + } + + if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) { + const APInt &C2 = N2C->getAPIntValue(); + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); + + switch (Cond) { + default: assert(0 && "Unknown integer setcc!"); + case ISD::SETEQ: return getConstant(C1 == C2, VT); + case ISD::SETNE: return getConstant(C1 != C2, VT); + case ISD::SETULT: return getConstant(C1.ult(C2), VT); + case ISD::SETUGT: return getConstant(C1.ugt(C2), VT); + case ISD::SETULE: return getConstant(C1.ule(C2), VT); + case ISD::SETUGE: return getConstant(C1.uge(C2), VT); + case ISD::SETLT: return getConstant(C1.slt(C2), VT); + case ISD::SETGT: return getConstant(C1.sgt(C2), VT); + case ISD::SETLE: return getConstant(C1.sle(C2), VT); + case ISD::SETGE: return getConstant(C1.sge(C2), VT); + } + } + } + if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) { + if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) { + // No compile time operations on this type yet. + if (N1C->getValueType(0) == MVT::ppcf128) + return SDValue(); + + APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF()); + switch (Cond) { + default: break; + case ISD::SETEQ: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT); + case ISD::SETNE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpLessThan, VT); + case ISD::SETLT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT); + case ISD::SETGT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT); + case ISD::SETLE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || + R==APFloat::cmpEqual, VT); + case ISD::SETGE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpEqual, VT); + case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, VT); + case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, VT); + case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpEqual, VT); + case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT); + case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpLessThan, VT); + case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpUnordered, VT); + case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT); + case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT); + } + } else { + // Ensure that the constant occurs on the RHS. + return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond)); + } + } + + // Could not fold it. + return SDValue(); +} + +/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We +/// use this predicate to simplify operations downstream. +bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { + unsigned BitWidth = Op.getValueSizeInBits(); + return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, + unsigned Depth) const { + APInt KnownZero, KnownOne; + ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + return (KnownZero & Mask) == Mask; +} + +/// ComputeMaskedBits - Determine which of the bits specified in Mask are +/// known to be either zero or one and return them in the KnownZero/KnownOne +/// bitsets. This code only analyzes bits in Mask, in order to short-circuit +/// processing. +void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, + APInt &KnownZero, APInt &KnownOne, + unsigned Depth) const { + unsigned BitWidth = Mask.getBitWidth(); + assert(BitWidth == Op.getValueType().getSizeInBits() && + "Mask size mismatches value type size!"); + + KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. + if (Depth == 6 || Mask == 0) + return; // Limit search depth. + + APInt KnownZero2, KnownOne2; + + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask; + KnownZero = ~KnownOne & Mask; + return; + case ISD::AND: + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero, + KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + return; + case ISD::OR: + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne, + KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + return; + case ISD::XOR: { + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownZero = KnownZeroOut; + return; + } + case ISD::MUL: { + APInt Mask2 = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clear(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes(), + BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero &= Mask; + return; + } + case ISD::UDIV: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be less than the denominator. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(0), + AllOnes, KnownZero2, KnownOne2, Depth+1); + unsigned LeadZ = KnownZero2.countLeadingOnes(); + + KnownOne2.clear(); + KnownZero2.clear(); + ComputeMaskedBits(Op.getOperand(1), + AllOnes, KnownZero2, KnownOne2, Depth+1); + unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); + if (RHSUnknownLeadingOnes != BitWidth) + LeadZ = std::min(BitWidth, + LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); + + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + return; + } + case ISD::SELECT: + ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case ISD::SELECT_CC: + ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + if (Op.getResNo() != 1) + return; + // The boolean result conforms to getBooleanContents. Fall through. + case ISD::SETCC: + // If we know the result of a setcc has the top bits zero, use this info. + if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + return; + case ISD::SHL: + // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt), + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero <<= ShAmt; + KnownOne <<= ShAmt; + // low bits known zero. + KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt); + } + return; + case ISD::SRL: + // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt), + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; + KnownZero |= HighBits; // High bits known zero. + } + return; + case ISD::SRA: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + return; + + APInt InDemandedMask = (Mask << ShAmt); + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; + if (HighBits.getBoolValue()) + InDemandedMask |= APInt::getSignBit(BitWidth); + + ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + // Handle the sign bits. + APInt SignBit = APInt::getSignBit(BitWidth); + SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask. + + if (KnownZero.intersects(SignBit)) { + KnownZero |= HighBits; // New bits are known zero. + } else if (KnownOne.intersects(SignBit)) { + KnownOne |= HighBits; // New bits are known one. + } + } + return; + case ISD::SIGN_EXTEND_INREG: { + MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + unsigned EBits = EVT.getSizeInBits(); + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask; + + APInt InSignBit = APInt::getSignBit(EBits); + APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits); + + // If the sign extended bits are demanded, we know that the sign + // bit is demanded. + InSignBit.zext(BitWidth); + if (NewBits.getBoolValue()) + InputDemandedBits |= InSignBit; + + ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear + KnownZero |= NewBits; + KnownOne &= ~NewBits; + } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + return; + } + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: { + unsigned LowBits = Log2_32(BitWidth)+1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownOne.clear(); + return; + } + case ISD::LOAD: { + if (ISD::isZEXTLoad(Op.getNode())) { + LoadSDNode *LD = cast<LoadSDNode>(Op); + MVT VT = LD->getMemoryVT(); + unsigned MemBits = VT.getSizeInBits(); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask; + } + return; + } + case ISD::ZERO_EXTEND: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; + APInt InMask = Mask; + InMask.trunc(InBits); + KnownZero.trunc(InBits); + KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + KnownZero |= NewBits; + return; + } + case ISD::SIGN_EXTEND: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt InSignBit = APInt::getSignBit(InBits); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; + APInt InMask = Mask; + InMask.trunc(InBits); + + // If any of the sign extended bits are demanded, we know that the sign + // bit is demanded. Temporarily set this bit in the mask for our callee. + if (NewBits.getBoolValue()) + InMask |= InSignBit; + + KnownZero.trunc(InBits); + KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + + // Note if the sign bit is known to be zero or one. + bool SignBitKnownZero = KnownZero.isNegative(); + bool SignBitKnownOne = KnownOne.isNegative(); + assert(!(SignBitKnownZero && SignBitKnownOne) && + "Sign bit can't be known to be both zero and one!"); + + // If the sign bit wasn't actually demanded by our caller, we don't + // want it set in the KnownZero and KnownOne result values. Reset the + // mask and reapply it to the result values. + InMask = Mask; + InMask.trunc(InBits); + KnownZero &= InMask; + KnownOne &= InMask; + + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + + // If the sign bit is known zero or one, the top bits match. + if (SignBitKnownZero) + KnownZero |= NewBits; + else if (SignBitKnownOne) + KnownOne |= NewBits; + return; + } + case ISD::ANY_EXTEND: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt InMask = Mask; + InMask.trunc(InBits); + KnownZero.trunc(InBits); + KnownOne.trunc(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + return; + } + case ISD::TRUNCATE: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt InMask = Mask; + InMask.zext(InBits); + KnownZero.zext(InBits); + KnownOne.zext(InBits); + ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.trunc(BitWidth); + KnownOne.trunc(BitWidth); + break; + } + case ISD::AssertZext: { + MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); + ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, + KnownOne, Depth+1); + KnownZero |= (~InMask) & Mask; + return; + } + case ISD::FGETSIGN: + // All bits are zero except the low bit. + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1); + return; + + case ISD::SUB: { + if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (CLHS->getAPIntValue().isNonNegative()) { + unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2, + Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + } + } + } + } + // fall through + case ISD::ADD: { + // Output known-0 bits are known if clear or set in both the low clear bits + // common to both LHS & RHS. For example, 8+(X<<3) is known to have the + // low 3 bits clear. + APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes()); + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); + + ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + KnownZeroOut = std::min(KnownZeroOut, + KnownZero2.countTrailingOnes()); + + KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); + return; + } + case ISD::SREM: + if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + const APInt &RA = Rem->getAPIntValue(); + if (RA.isPowerOf2() || (-RA).isPowerOf2()) { + APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA; + APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1); + + // If the sign bit of the first operand is zero, the sign bit of + // the result is zero. If the first operand has no one bits below + // the second operand's single 1 bit, its sign will be zero. + if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) + KnownZero2 |= ~LowBits; + + KnownZero |= KnownZero2 & Mask; + + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + } + } + return; + case ISD::UREM: { + if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + const APInt &RA = Rem->getAPIntValue(); + if (RA.isPowerOf2()) { + APInt LowBits = (RA - 1); + APInt Mask2 = LowBits & Mask; + KnownZero |= ~LowBits & Mask; + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1); + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + break; + } + } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne, + Depth+1); + ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2, + Depth+1); + + uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + KnownOne.clear(); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + return; + } + default: + // Allow the target to implement this method for its nodes. + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_VOID: + TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this); + } + return; + } +} + +/// ComputeNumSignBits - Return the number of times the sign bit of the +/// register is replicated into the other bits. We know that at least 1 bit +/// is always equal to the sign bit (itself), but other cases can give us +/// information. For example, immediately after an "SRA X, 2", we know that +/// the top 3 bits are all equal to each other, so we return 3. +unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ + MVT VT = Op.getValueType(); + assert(VT.isInteger() && "Invalid VT!"); + unsigned VTBits = VT.getSizeInBits(); + unsigned Tmp, Tmp2; + unsigned FirstAnswer = 1; + + if (Depth == 6) + return 1; // Limit search depth. + + switch (Op.getOpcode()) { + default: break; + case ISD::AssertSext: + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::AssertZext: + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + return VTBits-Tmp; + + case ISD::Constant: { + const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue(); + // If negative, return # leading ones. + if (Val.isNegative()) + return Val.countLeadingOnes(); + + // Return # leading zeros. + return Val.countLeadingZeros(); + } + + case ISD::SIGN_EXTEND: + Tmp = VTBits-Op.getOperand(0).getValueType().getSizeInBits(); + return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; + + case ISD::SIGN_EXTEND_INREG: + // Max of the input and what this extends. + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + Tmp = VTBits-Tmp+1; + + Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); + return std::max(Tmp, Tmp2); + + case ISD::SRA: + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + // SRA X, C -> adds C sign bits. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + Tmp += C->getZExtValue(); + if (Tmp > VTBits) Tmp = VTBits; + } + return Tmp; + case ISD::SHL: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (C->getZExtValue() >= VTBits || // Bad shift. + C->getZExtValue() >= Tmp) break; // Shifted all sign bits out. + return Tmp - C->getZExtValue(); + } + break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: // NOT is handled here. + // Logical binary ops preserve the number of sign bits at the worst. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp != 1) { + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + FirstAnswer = std::min(Tmp, Tmp2); + // We computed what we know about the sign bits as our first + // answer. Now proceed to the generic code that uses + // ComputeMaskedBits, and pick whichever answer is better. + } + break; + + case ISD::SELECT: + Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1); + return std::min(Tmp, Tmp2); + + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + if (Op.getResNo() != 1) + break; + // The boolean result conforms to getBooleanContents. Fall through. + case ISD::SETCC: + // If setcc returns 0/-1, all bits are sign bits. + if (TLI.getBooleanContents() == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return VTBits; + break; + case ISD::ROTL: + case ISD::ROTR: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned RotAmt = C->getZExtValue() & (VTBits-1); + + // Handle rotate right by N like a rotate left by 32-N. + if (Op.getOpcode() == ISD::ROTR) + RotAmt = (VTBits-RotAmt) & (VTBits-1); + + // If we aren't rotating out all of the known-in sign bits, return the + // number that are left. This handles rotl(sext(x), 1) for example. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp > RotAmt+1) return Tmp-RotAmt; + } + break; + case ISD::ADD: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + + // Special case decrementing a value (ADD X, -1): + if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + if (CRHS->isAllOnesValue()) { + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VTBits); + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(VTBits, 1)) == Mask) + return VTBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (KnownZero.isNegative()) + return Tmp; + } + + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + return std::min(Tmp, Tmp2)-1; + break; + + case ISD::SUB: + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + + // Handle NEG. + if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) + if (CLHS->isNullValue()) { + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VTBits); + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(VTBits, 1)) == Mask) + return VTBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the input. + if (KnownZero.isNegative()) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2)-1; + break; + case ISD::TRUNCATE: + // FIXME: it's tricky to do anything useful for this, but it is an important + // case for targets like X86. + break; + } + + // Handle LOADX separately here. EXTLOAD case will fallthrough. + if (Op.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(Op); + unsigned ExtType = LD->getExtensionType(); + switch (ExtType) { + default: break; + case ISD::SEXTLOAD: // '17' bits known + Tmp = LD->getMemoryVT().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::ZEXTLOAD: // '16' bits known + Tmp = LD->getMemoryVT().getSizeInBits(); + return VTBits-Tmp; + } + } + + // Allow the target to implement this method for its nodes. + if (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) { + unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth); + if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits); + } + + // Finally, if we can prove that the top bits of the result are 0's or 1's, + // use this information. + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VTBits); + ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + + if (KnownZero.isNegative()) { // sign bit is 0 + Mask = KnownZero; + } else if (KnownOne.isNegative()) { // sign bit is 1; + Mask = KnownOne; + } else { + // Nothing known. + return FirstAnswer; + } + + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // the number of identical bits in the top of the input value. + Mask = ~Mask; + Mask <<= Mask.getBitWidth()-VTBits; + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); +} + + +bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { + GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); + if (!GA) return false; + if (GA->getOffset() != 0) return false; + GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()); + if (!GV) return false; + MachineModuleInfo *MMI = getMachineModuleInfo(); + return MMI && MMI->hasDebugInfo(); +} + + +/// getShuffleScalarElt - Returns the scalar element that will make up the ith +/// element of the result of the vector shuffle. +SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, + unsigned i) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + if (N->getMaskElt(i) < 0) + return getUNDEF(VT.getVectorElementType()); + unsigned Index = N->getMaskElt(i); + unsigned NumElems = VT.getVectorNumElements(); + SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1); + Index %= NumElems; + + if (V.getOpcode() == ISD::BIT_CONVERT) { + V = V.getOperand(0); + MVT VVT = V.getValueType(); + if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems) + return SDValue(); + } + if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) + return (Index == 0) ? V.getOperand(0) + : getUNDEF(VT.getVectorElementType()); + if (V.getOpcode() == ISD::BUILD_VECTOR) + return V.getOperand(Index); + if (const ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V)) + return getShuffleScalarElt(SVN, Index); + return SDValue(); +} + + +/// getNode - Gets or creates the specified node. +/// +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<SDNode>(); + new (N) SDNode(Opcode, DL, getVTList(VT)); + CSEMap.InsertNode(N, IP); + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + MVT VT, SDValue Operand) { + // Constant fold unary operations with an integer constant operand. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) { + const APInt &Val = C->getAPIntValue(); + unsigned BitWidth = VT.getSizeInBits(); + switch (Opcode) { + default: break; + case ISD::SIGN_EXTEND: + return getConstant(APInt(Val).sextOrTrunc(BitWidth), VT); + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::TRUNCATE: + return getConstant(APInt(Val).zextOrTrunc(BitWidth), VT); + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: { + const uint64_t zero[] = {0, 0}; + // No compile time operations on this type. + if (VT==MVT::ppcf128) + break; + APFloat apf = APFloat(APInt(BitWidth, 2, zero)); + (void)apf.convertFromAPInt(Val, + Opcode==ISD::SINT_TO_FP, + APFloat::rmNearestTiesToEven); + return getConstantFP(apf, VT); + } + case ISD::BIT_CONVERT: + if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) + return getConstantFP(Val.bitsToFloat(), VT); + else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + return getConstantFP(Val.bitsToDouble(), VT); + break; + case ISD::BSWAP: + return getConstant(Val.byteSwap(), VT); + case ISD::CTPOP: + return getConstant(Val.countPopulation(), VT); + case ISD::CTLZ: + return getConstant(Val.countLeadingZeros(), VT); + case ISD::CTTZ: + return getConstant(Val.countTrailingZeros(), VT); + } + } + + // Constant fold unary operations with a floating point constant operand. + if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) { + APFloat V = C->getValueAPF(); // make copy + if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) { + switch (Opcode) { + case ISD::FNEG: + V.changeSign(); + return getConstantFP(V, VT); + case ISD::FABS: + V.clearSign(); + return getConstantFP(V, VT); + case ISD::FP_ROUND: + case ISD::FP_EXTEND: { + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(*MVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + integerPart x[2]; + bool ignored; + assert(integerPartWidth >= 64); + // FIXME need to be more flexible about rounding mode. + APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), + Opcode==ISD::FP_TO_SINT, + APFloat::rmTowardZero, &ignored); + if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual + break; + APInt api(VT.getSizeInBits(), 2, x); + return getConstant(api, VT); + } + case ISD::BIT_CONVERT: + if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); + else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + break; + } + } + } + + unsigned OpOpcode = Operand.getNode()->getOpcode(); + switch (Opcode) { + case ISD::TokenFactor: + case ISD::MERGE_VALUES: + case ISD::CONCAT_VECTORS: + return Operand; // Factor, merge or concat of one node? No need. + case ISD::FP_ROUND: assert(0 && "Invalid method to make FP_ROUND node"); + case ISD::FP_EXTEND: + assert(VT.isFloatingPoint() && + Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); + if (Operand.getValueType() == VT) return Operand; // noop conversion. + if (Operand.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::SIGN_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid SIGN_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().bitsLT(VT) + && "Invalid sext node, dst < src!"); + if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + break; + case ISD::ZERO_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid ZERO_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().bitsLT(VT) + && "Invalid zext node, dst < src!"); + if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) + return getNode(ISD::ZERO_EXTEND, DL, VT, + Operand.getNode()->getOperand(0)); + break; + case ISD::ANY_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid ANY_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert(Operand.getValueType().bitsLT(VT) + && "Invalid anyext node, dst < src!"); + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND) + // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + break; + case ISD::TRUNCATE: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid TRUNCATE!"); + if (Operand.getValueType() == VT) return Operand; // noop truncate + assert(Operand.getValueType().bitsGT(VT) + && "Invalid truncate node, src < dst!"); + if (OpOpcode == ISD::TRUNCATE) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) { + // If the source is smaller than the dest, we still need an extend. + if (Operand.getNode()->getOperand(0).getValueType().bitsLT(VT)) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); + else + return Operand.getNode()->getOperand(0); + } + break; + case ISD::BIT_CONVERT: + // Basic sanity checking. + assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits() + && "Cannot BIT_CONVERT between types of different sizes!"); + if (VT == Operand.getValueType()) return Operand; // noop conversion. + if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x) + return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0)); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::SCALAR_TO_VECTOR: + assert(VT.isVector() && !Operand.getValueType().isVector() && + (VT.getVectorElementType() == Operand.getValueType() || + (VT.getVectorElementType().isInteger() && + Operand.getValueType().isInteger() && + VT.getVectorElementType().bitsLE(Operand.getValueType()))) && + "Illegal SCALAR_TO_VECTOR node!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined. + if (OpOpcode == ISD::EXTRACT_VECTOR_ELT && + isa<ConstantSDNode>(Operand.getOperand(1)) && + Operand.getConstantOperandVal(1) == 0 && + Operand.getOperand(0).getValueType() == VT) + return Operand.getOperand(0); + break; + case ISD::FNEG: + // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 + if (UnsafeFPMath && OpOpcode == ISD::FSUB) + return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), + Operand.getNode()->getOperand(0)); + if (OpOpcode == ISD::FNEG) // --X -> X + return Operand.getNode()->getOperand(0); + break; + case ISD::FABS: + if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) + return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0)); + break; + } + + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { // Don't CSE flag producing nodes + FoldingSetNodeID ID; + SDValue Ops[1] = { Operand }; + AddNodeIDNode(ID, Opcode, VTs, Ops, 1); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + N = NodeAllocator.Allocate<UnarySDNode>(); + new (N) UnarySDNode(Opcode, DL, VTs, Operand); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate<UnarySDNode>(); + new (N) UnarySDNode(Opcode, DL, VTs, Operand); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, + MVT VT, + ConstantSDNode *Cst1, + ConstantSDNode *Cst2) { + const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue(); + + switch (Opcode) { + case ISD::ADD: return getConstant(C1 + C2, VT); + case ISD::SUB: return getConstant(C1 - C2, VT); + case ISD::MUL: return getConstant(C1 * C2, VT); + case ISD::UDIV: + if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT); + break; + case ISD::UREM: + if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT); + break; + case ISD::SDIV: + if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT); + break; + case ISD::SREM: + if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT); + break; + case ISD::AND: return getConstant(C1 & C2, VT); + case ISD::OR: return getConstant(C1 | C2, VT); + case ISD::XOR: return getConstant(C1 ^ C2, VT); + case ISD::SHL: return getConstant(C1 << C2, VT); + case ISD::SRL: return getConstant(C1.lshr(C2), VT); + case ISD::SRA: return getConstant(C1.ashr(C2), VT); + case ISD::ROTL: return getConstant(C1.rotl(C2), VT); + case ISD::ROTR: return getConstant(C1.rotr(C2), VT); + default: break; + } + + return SDValue(); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue N1, SDValue N2) { + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + switch (Opcode) { + default: break; + case ISD::TokenFactor: + assert(VT == MVT::Other && N1.getValueType() == MVT::Other && + N2.getValueType() == MVT::Other && "Invalid token factor!"); + // Fold trivial token factors. + if (N1.getOpcode() == ISD::EntryToken) return N2; + if (N2.getOpcode() == ISD::EntryToken) return N1; + if (N1 == N2) return N1; + break; + case ISD::CONCAT_VECTORS: + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to + // one big BUILD_VECTOR. + if (N1.getOpcode() == ISD::BUILD_VECTOR && + N2.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); + Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end()); + return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + } + break; + case ISD::AND: + assert(VT.isInteger() && N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's + // worth handling here. + if (N2C && N2C->isNullValue()) + return N2; + if (N2C && N2C->isAllOnesValue()) // X & -1 -> X + return N1; + break; + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + assert(VT.isInteger() && N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so + // it's worth handling here. + if (N2C && N2C->isNullValue()) + return N1; + break; + case ISD::UDIV: + case ISD::UREM: + case ISD::MULHU: + case ISD::MULHS: + case ISD::MUL: + case ISD::SDIV: + case ISD::SREM: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + // fall through + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (UnsafeFPMath) { + if (Opcode == ISD::FADD) { + // 0+x --> x + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) + if (CFP->getValueAPF().isZero()) + return N2; + // x+0 --> x + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) + if (CFP->getValueAPF().isZero()) + return N1; + } else if (Opcode == ISD::FSUB) { + // x-0 --> x + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2)) + if (CFP->getValueAPF().isZero()) + return N1; + } + } + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; + case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. + assert(N1.getValueType() == VT && + N1.getValueType().isFloatingPoint() && + N2.getValueType().isFloatingPoint() && + "Invalid FCOPYSIGN!"); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + assert(VT == N1.getValueType() && + "Shift operators return type must be the same as their first arg"); + assert(VT.isInteger() && N2.getValueType().isInteger() && + "Shifts only work on integers"); + + // Always fold shifts of i1 values so the code generator doesn't need to + // handle them. Since we know the size of the shift has to be less than the + // size of the value, the shift/rotate count is guaranteed to be zero. + if (VT == MVT::i1) + return N1; + break; + case ISD::FP_ROUND_INREG: { + MVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg round!"); + assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && + "Cannot FP_ROUND_INREG integer types"); + assert(EVT.bitsLE(VT) && "Not rounding down!"); + if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding. + break; + } + case ISD::FP_ROUND: + assert(VT.isFloatingPoint() && + N1.getValueType().isFloatingPoint() && + VT.bitsLE(N1.getValueType()) && + isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!"); + if (N1.getValueType() == VT) return N1; // noop conversion. + break; + case ISD::AssertSext: + case ISD::AssertZext: { + MVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(VT.isInteger() && EVT.isInteger() && + "Cannot *_EXTEND_INREG FP types"); + assert(EVT.bitsLE(VT) && "Not extending!"); + if (VT == EVT) return N1; // noop assertion. + break; + } + case ISD::SIGN_EXTEND_INREG: { + MVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(VT.isInteger() && EVT.isInteger() && + "Cannot *_EXTEND_INREG FP types"); + assert(EVT.bitsLE(VT) && "Not extending!"); + if (EVT == VT) return N1; // Not actually extending + + if (N1C) { + APInt Val = N1C->getAPIntValue(); + unsigned FromBits = cast<VTSDNode>(N2)->getVT().getSizeInBits(); + Val <<= Val.getBitWidth()-FromBits; + Val = Val.ashr(Val.getBitWidth()-FromBits); + return getConstant(Val, VT); + } + break; + } + case ISD::EXTRACT_VECTOR_ELT: + // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. + if (N1.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is + // expanding copies of large vectors from registers. + if (N2C && + N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getNumOperands() > 0) { + unsigned Factor = + N1.getOperand(0).getValueType().getVectorNumElements(); + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, + N1.getOperand(N2C->getZExtValue() / Factor), + getConstant(N2C->getZExtValue() % Factor, + N2.getValueType())); + } + + // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is + // expanding large vector constants. + if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { + SDValue Elt = N1.getOperand(N2C->getZExtValue()); + if (Elt.getValueType() != VT) { + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + assert(VT.isInteger() && Elt.getValueType().isInteger() && + VT.bitsLE(Elt.getValueType()) && + "Bad type for BUILD_VECTOR operand"); + Elt = getNode(ISD::TRUNCATE, DL, VT, Elt); + } + return Elt; + } + + // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector + // operations are lowered to scalars. + if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) { + // If the indices are the same, return the inserted element. + if (N1.getOperand(2) == N2) + return N1.getOperand(1); + // If the indices are known different, extract the element from + // the original vector. + else if (isa<ConstantSDNode>(N1.getOperand(2)) && + isa<ConstantSDNode>(N2)) + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); + } + break; + case ISD::EXTRACT_ELEMENT: + assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!"); + assert(!N1.getValueType().isVector() && !VT.isVector() && + (N1.getValueType().isInteger() == VT.isInteger()) && + "Wrong types for EXTRACT_ELEMENT!"); + + // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding + // 64-bit integers into 32-bit parts. Instead of building the extract of + // the BUILD_PAIR, only to have legalize rip it apart, just do it now. + if (N1.getOpcode() == ISD::BUILD_PAIR) + return N1.getOperand(N2C->getZExtValue()); + + // EXTRACT_ELEMENT of a constant int is also very common. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + unsigned ElementSize = VT.getSizeInBits(); + unsigned Shift = ElementSize * N2C->getZExtValue(); + APInt ShiftedVal = C->getAPIntValue().lshr(Shift); + return getConstant(ShiftedVal.trunc(ElementSize), VT); + } + break; + case ISD::EXTRACT_SUBVECTOR: + if (N1.getValueType() == VT) // Trivial extraction. + return N1; + break; + } + + if (N1C) { + if (N2C) { + SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C); + if (SV.getNode()) return SV; + } else { // Cannonicalize constant to RHS if commutative + if (isCommutativeBinOp(Opcode)) { + std::swap(N1C, N2C); + std::swap(N1, N2); + } + } + } + + // Constant fold FP operations. + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode()); + ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); + if (N1CFP) { + if (!N2CFP && isCommutativeBinOp(Opcode)) { + // Cannonicalize constant to RHS if commutative + std::swap(N1CFP, N2CFP); + std::swap(N1, N2); + } else if (N2CFP && VT != MVT::ppcf128) { + APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); + APFloat::opStatus s; + switch (Opcode) { + case ISD::FADD: + s = V1.add(V2, APFloat::rmNearestTiesToEven); + if (s != APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FSUB: + s = V1.subtract(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FMUL: + s = V1.multiply(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp) + return getConstantFP(V1, VT); + break; + case ISD::FDIV: + s = V1.divide(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + return getConstantFP(V1, VT); + break; + case ISD::FREM : + s = V1.mod(V2, APFloat::rmNearestTiesToEven); + if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + return getConstantFP(V1, VT); + break; + case ISD::FCOPYSIGN: + V1.copySign(V2); + return getConstantFP(V1, VT); + default: break; + } + } + } + + // Canonicalize an UNDEF to the RHS, even over a constant. + if (N1.getOpcode() == ISD::UNDEF) { + if (isCommutativeBinOp(Opcode)) { + std::swap(N1, N2); + } else { + switch (Opcode) { + case ISD::FP_ROUND_INREG: + case ISD::SIGN_EXTEND_INREG: + case ISD::SUB: + case ISD::FSUB: + case ISD::FDIV: + case ISD::FREM: + case ISD::SRA: + return N1; // fold op(undef, arg2) -> undef + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + case ISD::SRL: + case ISD::SHL: + if (!VT.isVector()) + return getConstant(0, VT); // fold op(undef, arg2) -> 0 + // For vectors, we can't easily build an all zero vector, just return + // the LHS. + return N2; + } + } + } + + // Fold a bunch of operators when the RHS is undef. + if (N2.getOpcode() == ISD::UNDEF) { + switch (Opcode) { + case ISD::XOR: + if (N1.getOpcode() == ISD::UNDEF) + // Handle undef ^ undef -> 0 special case. This is a common + // idiom (misuse). + return getConstant(0, VT); + // fallthrough + case ISD::ADD: + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUB: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + return N2; // fold op(arg1, undef) -> undef + case ISD::MUL: + case ISD::AND: + case ISD::SRL: + case ISD::SHL: + if (!VT.isVector()) + return getConstant(0, VT); // fold op(arg1, undef) -> 0 + // For vectors, we can't easily build an all zero vector, just return + // the LHS. + return N1; + case ISD::OR: + if (!VT.isVector()) + return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + // For vectors, we can't easily build an all one vector, just return + // the LHS. + return N1; + case ISD::SRA: + return N1; + } + } + + // Memoize this node if possible. + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { + SDValue Ops[] = { N1, N2 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, 2); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + N = NodeAllocator.Allocate<BinarySDNode>(); + new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate<BinarySDNode>(); + new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue N1, SDValue N2, SDValue N3) { + // Perform various simplifications. + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + switch (Opcode) { + case ISD::CONCAT_VECTORS: + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to + // one big BUILD_VECTOR. + if (N1.getOpcode() == ISD::BUILD_VECTOR && + N2.getOpcode() == ISD::BUILD_VECTOR && + N3.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); + Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end()); + Elts.insert(Elts.end(), N3.getNode()->op_begin(), N3.getNode()->op_end()); + return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + } + break; + case ISD::SETCC: { + // Use FoldSetCC to simplify SETCC's. + SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL); + if (Simp.getNode()) return Simp; + break; + } + case ISD::SELECT: + if (N1C) { + if (N1C->getZExtValue()) + return N2; // select true, X, Y -> X + else + return N3; // select false, X, Y -> Y + } + + if (N2 == N3) return N2; // select C, X, X -> X + break; + case ISD::BRCOND: + if (N2C) { + if (N2C->getZExtValue()) // Unconditional branch + return getNode(ISD::BR, DL, MVT::Other, N1, N3); + else + return N1; // Never-taken branch + } + break; + case ISD::VECTOR_SHUFFLE: + assert(0 && "should use getVectorShuffle constructor!"); + break; + case ISD::BIT_CONVERT: + // Fold bit_convert nodes from a type to themselves. + if (N1.getValueType() == VT) + return N1; + break; + } + + // Memoize node if it doesn't produce a flag. + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Flag) { + SDValue Ops[] = { N1, N2, N3 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + N = NodeAllocator.Allocate<TernarySDNode>(); + new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate<TernarySDNode>(); + new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + } + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4) { + SDValue Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, DL, VT, Ops, 4); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4, SDValue N5) { + SDValue Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, DL, VT, Ops, 5); +} + +/// getMemsetValue - Vectorized representation of the memset value +/// operand. +static SDValue getMemsetValue(SDValue Value, MVT VT, SelectionDAG &DAG, + DebugLoc dl) { + unsigned NumBits = VT.isVector() ? + VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits(); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { + APInt Val = APInt(NumBits, C->getZExtValue() & 255); + unsigned Shift = 8; + for (unsigned i = NumBits; i > 8; i >>= 1) { + Val = (Val << Shift) | Val; + Shift <<= 1; + } + if (VT.isInteger()) + return DAG.getConstant(Val, VT); + return DAG.getConstantFP(APFloat(Val), VT); + } + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); + unsigned Shift = 8; + for (unsigned i = NumBits; i > 8; i >>= 1) { + Value = DAG.getNode(ISD::OR, dl, VT, + DAG.getNode(ISD::SHL, dl, VT, Value, + DAG.getConstant(Shift, + TLI.getShiftAmountTy())), + Value); + Shift <<= 1; + } + + return Value; +} + +/// getMemsetStringVal - Similar to getMemsetValue. Except this is only +/// used when a memcpy is turned into a memset when the source is a constant +/// string ptr. +static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG, + const TargetLowering &TLI, + std::string &Str, unsigned Offset) { + // Handle vector with all elements zero. + if (Str.empty()) { + if (VT.isInteger()) + return DAG.getConstant(0, VT); + unsigned NumElts = VT.getVectorNumElements(); + MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getConstant(0, MVT::getVectorVT(EltVT, NumElts))); + } + + assert(!VT.isVector() && "Can't handle vector type here!"); + unsigned NumBits = VT.getSizeInBits(); + unsigned MSB = NumBits / 8; + uint64_t Val = 0; + if (TLI.isLittleEndian()) + Offset = Offset + MSB - 1; + for (unsigned i = 0; i != MSB; ++i) { + Val = (Val << 8) | (unsigned char)Str[Offset]; + Offset += TLI.isLittleEndian() ? -1 : 1; + } + return DAG.getConstant(Val, VT); +} + +/// getMemBasePlusOffset - Returns base and offset node for the +/// +static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, + SelectionDAG &DAG) { + MVT VT = Base.getValueType(); + return DAG.getNode(ISD::ADD, Base.getDebugLoc(), + VT, Base, DAG.getConstant(Offset, VT)); +} + +/// isMemSrcFromString - Returns true if memcpy source is a string constant. +/// +static bool isMemSrcFromString(SDValue Src, std::string &Str) { + unsigned SrcDelta = 0; + GlobalAddressSDNode *G = NULL; + if (Src.getOpcode() == ISD::GlobalAddress) + G = cast<GlobalAddressSDNode>(Src); + else if (Src.getOpcode() == ISD::ADD && + Src.getOperand(0).getOpcode() == ISD::GlobalAddress && + Src.getOperand(1).getOpcode() == ISD::Constant) { + G = cast<GlobalAddressSDNode>(Src.getOperand(0)); + SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue(); + } + if (!G) + return false; + + GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal()); + if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false)) + return true; + + return false; +} + +/// MeetsMaxMemopRequirement - Determines if the number of memory ops required +/// to replace the memset / memcpy is below the threshold. It also returns the +/// types of the sequence of memory ops to perform memset / memcpy. +static +bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps, + SDValue Dst, SDValue Src, + unsigned Limit, uint64_t Size, unsigned &Align, + std::string &Str, bool &isSrcStr, + SelectionDAG &DAG, + const TargetLowering &TLI) { + isSrcStr = isMemSrcFromString(Src, Str); + bool isSrcConst = isa<ConstantSDNode>(Src); + bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(); + MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr); + if (VT != MVT::iAny) { + unsigned NewAlign = (unsigned) + TLI.getTargetData()->getABITypeAlignment(VT.getTypeForMVT()); + // If source is a string constant, this will require an unaligned load. + if (NewAlign > Align && (isSrcConst || AllowUnalign)) { + if (Dst.getOpcode() != ISD::FrameIndex) { + // Can't change destination alignment. It requires a unaligned store. + if (AllowUnalign) + VT = MVT::iAny; + } else { + int FI = cast<FrameIndexSDNode>(Dst)->getIndex(); + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + if (MFI->isFixedObjectIndex(FI)) { + // Can't change destination alignment. It requires a unaligned store. + if (AllowUnalign) + VT = MVT::iAny; + } else { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI) < NewAlign) + MFI->setObjectAlignment(FI, NewAlign); + Align = NewAlign; + } + } + } + } + + if (VT == MVT::iAny) { + if (AllowUnalign) { + VT = MVT::i64; + } else { + switch (Align & 7) { + case 0: VT = MVT::i64; break; + case 4: VT = MVT::i32; break; + case 2: VT = MVT::i16; break; + default: VT = MVT::i8; break; + } + } + + MVT LVT = MVT::i64; + while (!TLI.isTypeLegal(LVT)) + LVT = (MVT::SimpleValueType)(LVT.getSimpleVT() - 1); + assert(LVT.isInteger()); + + if (VT.bitsGT(LVT)) + VT = LVT; + } + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned VTSize = VT.getSizeInBits() / 8; + while (VTSize > Size) { + // For now, only use non-vector load / store's for the left-over pieces. + if (VT.isVector()) { + VT = MVT::i64; + while (!TLI.isTypeLegal(VT)) + VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1); + VTSize = VT.getSizeInBits() / 8; + } else { + VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1); + VTSize >>= 1; + } + } + + if (++NumMemOps > Limit) + return false; + MemOps.push_back(VT); + Size -= VTSize; + } + + return true; +} + +static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff){ + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Expand memcpy to a series of load and store ops if the size operand falls + // below a certain threshold. + std::vector<MVT> MemOps; + uint64_t Limit = -1ULL; + if (!AlwaysInline) + Limit = TLI.getMaxStoresPerMemcpy(); + unsigned DstAlign = Align; // Destination alignment can change. + std::string Str; + bool CopyFromStr; + if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign, + Str, CopyFromStr, DAG, TLI)) + return SDValue(); + + + bool isZeroStr = CopyFromStr && Str.empty(); + SmallVector<SDValue, 8> OutChains; + unsigned NumMemOps = MemOps.size(); + uint64_t SrcOff = 0, DstOff = 0; + for (unsigned i = 0; i < NumMemOps; i++) { + MVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + if (CopyFromStr && (isZeroStr || !VT.isVector())) { + // It's unlikely a store of a vector immediate can be done in a single + // instruction. It would require a load from a constantpool first. + // We also handle store a vector with all zero's. + // FIXME: Handle other cases where store of vector immediate is done in + // a single instruction. + Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff); + Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, false, DstAlign); + } else { + Value = DAG.getLoad(VT, dl, Chain, + getMemBasePlusOffset(Src, SrcOff, DAG), + SrcSV, SrcSVOff + SrcOff, false, Align); + Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, false, DstAlign); + } + OutChains.push_back(Store); + SrcOff += VTSize; + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff){ + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Expand memmove to a series of load and store ops if the size operand falls + // below a certain threshold. + std::vector<MVT> MemOps; + uint64_t Limit = -1ULL; + if (!AlwaysInline) + Limit = TLI.getMaxStoresPerMemmove(); + unsigned DstAlign = Align; // Destination alignment can change. + std::string Str; + bool CopyFromStr; + if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign, + Str, CopyFromStr, DAG, TLI)) + return SDValue(); + + uint64_t SrcOff = 0, DstOff = 0; + + SmallVector<SDValue, 8> LoadValues; + SmallVector<SDValue, 8> LoadChains; + SmallVector<SDValue, 8> OutChains; + unsigned NumMemOps = MemOps.size(); + for (unsigned i = 0; i < NumMemOps; i++) { + MVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + Value = DAG.getLoad(VT, dl, Chain, + getMemBasePlusOffset(Src, SrcOff, DAG), + SrcSV, SrcSVOff + SrcOff, false, Align); + LoadValues.push_back(Value); + LoadChains.push_back(Value.getValue(1)); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + OutChains.clear(); + for (unsigned i = 0; i < NumMemOps; i++) { + MVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + Store = DAG.getStore(Chain, dl, LoadValues[i], + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, false, DstAlign); + OutChains.push_back(Store); + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, + const Value *DstSV, uint64_t DstSVOff) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Expand memset to a series of load/store ops if the size operand + // falls below a certain threshold. + std::vector<MVT> MemOps; + std::string Str; + bool CopyFromStr; + if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(), + Size, Align, Str, CopyFromStr, DAG, TLI)) + return SDValue(); + + SmallVector<SDValue, 8> OutChains; + uint64_t DstOff = 0; + + unsigned NumMemOps = MemOps.size(); + for (unsigned i = 0; i < NumMemOps; i++) { + MVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value = getMemsetValue(Src, VT, DAG, dl); + SDValue Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff); + OutChains.push_back(Store); + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); +} + +SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff) { + + // Check to see if we should lower the memcpy to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memcpy with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), + Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memcpy with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, + AlwaysInline, + DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + + // If we really need inline code and the target declined to provide it, + // use a (potentially long) sequence of loads and stores. + if (AlwaysInline) { + assert(ConstantSize && "AlwaysInline requires a constant size!"); + return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Align, true, + DstSV, DstSVOff, SrcSV, SrcSVOff); + } + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + Entry.Node = Size; Args.push_back(Entry); + // FIXME: pass in DebugLoc + std::pair<SDValue,SDValue> CallResult = + TLI.LowerCallTo(Chain, Type::VoidTy, + false, false, false, false, CallingConv::C, false, + getExternalSymbol("memcpy", TLI.getPointerTy()), + Args, *this, dl); + return CallResult.second; +} + +SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff) { + + // Check to see if we should lower the memmove to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memmove with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), + Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memmove with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, + DstSV, DstSVOff, SrcSV, SrcSVOff); + if (Result.getNode()) + return Result; + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + Entry.Node = Size; Args.push_back(Entry); + // FIXME: pass in DebugLoc + std::pair<SDValue,SDValue> CallResult = + TLI.LowerCallTo(Chain, Type::VoidTy, + false, false, false, false, CallingConv::C, false, + getExternalSymbol("memmove", TLI.getPointerTy()), + Args, *this, dl); + return CallResult.second; +} + +SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, + const Value *DstSV, uint64_t DstSVOff) { + + // Check to see if we should lower the memset to stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memset with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), + Align, DstSV, DstSVOff); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memset with target-specific + // code. If the target chooses to do this, this is the next best. + SDValue Result = + TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, + DstSV, DstSVOff); + if (Result.getNode()) + return Result; + + // Emit a library call. + const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; Entry.Ty = IntPtrTy; + Args.push_back(Entry); + // Extend or truncate the argument to be an i32 value for the call. + if (Src.getValueType().bitsGT(MVT::i32)) + Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src); + else + Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); + Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true; + Args.push_back(Entry); + Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false; + Args.push_back(Entry); + // FIXME: pass in DebugLoc + std::pair<SDValue,SDValue> CallResult = + TLI.LowerCallTo(Chain, Type::VoidTy, + false, false, false, false, CallingConv::C, false, + getExternalSymbol("memset", TLI.getPointerTy()), + Args, *this, dl); + return CallResult.second; +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Cmp, + SDValue Swp, const Value* PtrVal, + unsigned Alignment) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op"); + assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); + + MVT VT = Cmp.getValueType(); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(MemVT); + + SDVTList VTs = getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; + AddNodeIDNode(ID, Opcode, VTs, Ops, 4); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode* N = NodeAllocator.Allocate<AtomicSDNode>(); + new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, + Chain, Ptr, Cmp, Swp, PtrVal, Alignment); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Val, + const Value* PtrVal, + unsigned Alignment) { + assert((Opcode == ISD::ATOMIC_LOAD_ADD || + Opcode == ISD::ATOMIC_LOAD_SUB || + Opcode == ISD::ATOMIC_LOAD_AND || + Opcode == ISD::ATOMIC_LOAD_OR || + Opcode == ISD::ATOMIC_LOAD_XOR || + Opcode == ISD::ATOMIC_LOAD_NAND || + Opcode == ISD::ATOMIC_LOAD_MIN || + Opcode == ISD::ATOMIC_LOAD_MAX || + Opcode == ISD::ATOMIC_LOAD_UMIN || + Opcode == ISD::ATOMIC_LOAD_UMAX || + Opcode == ISD::ATOMIC_SWAP) && + "Invalid Atomic Op"); + + MVT VT = Val.getValueType(); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(MemVT); + + SDVTList VTs = getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + SDValue Ops[] = {Chain, Ptr, Val}; + AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode* N = NodeAllocator.Allocate<AtomicSDNode>(); + new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, + Chain, Ptr, Val, PtrVal, Alignment); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +/// getMergeValues - Create a MERGE_VALUES node from the given operands. +/// Allowed to return something different (and simpler) if Simplify is true. +SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, + DebugLoc dl) { + if (NumOps == 1) + return Ops[0]; + + SmallVector<MVT, 4> VTs; + VTs.reserve(NumOps); + for (unsigned i = 0; i < NumOps; ++i) + VTs.push_back(Ops[i].getValueType()); + return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps), + Ops, NumOps); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, + const MVT *VTs, unsigned NumVTs, + const SDValue *Ops, unsigned NumOps, + MVT MemVT, const Value *srcValue, int SVOff, + unsigned Align, bool Vol, + bool ReadMem, bool WriteMem) { + return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps, + MemVT, srcValue, SVOff, Align, Vol, + ReadMem, WriteMem); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, + const SDValue *Ops, unsigned NumOps, + MVT MemVT, const Value *srcValue, int SVOff, + unsigned Align, bool Vol, + bool ReadMem, bool WriteMem) { + // Memoize the node unless it returns a flag. + MemIntrinsicSDNode *N; + if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + N = NodeAllocator.Allocate<MemIntrinsicSDNode>(); + new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, + srcValue, SVOff, Align, Vol, ReadMem, WriteMem); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate<MemIntrinsicSDNode>(); + new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, + srcValue, SVOff, Align, Vol, ReadMem, WriteMem); + } + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getCall(unsigned CallingConv, DebugLoc dl, bool IsVarArgs, + bool IsTailCall, bool IsInreg, SDVTList VTs, + const SDValue *Operands, unsigned NumOperands) { + // Do not include isTailCall in the folding set profile. + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::CALL, VTs, Operands, NumOperands); + ID.AddInteger(CallingConv); + ID.AddInteger(IsVarArgs); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + // Instead of including isTailCall in the folding set, we just + // set the flag of the existing node. + if (!IsTailCall) + cast<CallSDNode>(E)->setNotTailCall(); + return SDValue(E, 0); + } + SDNode *N = NodeAllocator.Allocate<CallSDNode>(); + new (N) CallSDNode(CallingConv, dl, IsVarArgs, IsTailCall, IsInreg, + VTs, Operands, NumOperands); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, + ISD::LoadExtType ExtType, MVT VT, SDValue Chain, + SDValue Ptr, SDValue Offset, + const Value *SV, int SVOffset, MVT EVT, + bool isVolatile, unsigned Alignment) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(VT); + + if (VT == EVT) { + ExtType = ISD::NON_EXTLOAD; + } else if (ExtType == ISD::NON_EXTLOAD) { + assert(VT == EVT && "Non-extending load from different memory type!"); + } else { + // Extending load. + if (VT.isVector()) + assert(EVT.getVectorNumElements() == VT.getVectorNumElements() && + "Invalid vector extload!"); + else + assert(EVT.bitsLT(VT) && + "Should only be an extending load, not truncating!"); + assert((ExtType == ISD::EXTLOAD || VT.isInteger()) && + "Cannot sign/zero extend a FP/Vector load!"); + assert(VT.isInteger() == EVT.isInteger() && + "Cannot convert from FP to Int or Int -> FP!"); + } + + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.getOpcode() == ISD::UNDEF) && + "Unindexed load with an offset!"); + + SDVTList VTs = Indexed ? + getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); + SDValue Ops[] = { Chain, Ptr, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); + ID.AddInteger(EVT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, isVolatile, Alignment)); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<LoadSDNode>(); + new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, EVT, SV, SVOffset, + Alignment, isVolatile); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getLoad(MVT VT, DebugLoc dl, + SDValue Chain, SDValue Ptr, + const Value *SV, int SVOffset, + bool isVolatile, unsigned Alignment) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef, + SV, SVOffset, VT, isVolatile, Alignment); +} + +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, MVT VT, + SDValue Chain, SDValue Ptr, + const Value *SV, + int SVOffset, MVT EVT, + bool isVolatile, unsigned Alignment) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef, + SV, SVOffset, EVT, isVolatile, Alignment); +} + +SDValue +SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM) { + LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); + assert(LD->getOffset().getOpcode() == ISD::UNDEF && + "Load is already a indexed load!"); + return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(), + LD->getChain(), Base, Offset, LD->getSrcValue(), + LD->getSrcValueOffset(), LD->getMemoryVT(), + LD->isVolatile(), LD->getAlignment()); +} + +SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, const Value *SV, int SVOffset, + bool isVolatile, unsigned Alignment) { + MVT VT = Val.getValueType(); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(VT); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, + isVolatile, Alignment)); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); + new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, + VT, SV, SVOffset, Alignment, isVolatile); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, const Value *SV, + int SVOffset, MVT SVT, + bool isVolatile, unsigned Alignment) { + MVT VT = Val.getValueType(); + + if (VT == SVT) + return getStore(Chain, dl, Val, Ptr, SV, SVOffset, isVolatile, Alignment); + + assert(VT.bitsGT(SVT) && "Not a truncation?"); + assert(VT.isInteger() == SVT.isInteger() && + "Can't do FP-INT conversion!"); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getMVTAlignment(VT); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(SVT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, + isVolatile, Alignment)); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); + new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, + SVT, SV, SVOffset, Alignment, isVolatile); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM) { + StoreSDNode *ST = cast<StoreSDNode>(OrigStore); + assert(ST->getOffset().getOpcode() == ISD::UNDEF && + "Store is already a indexed store!"); + SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); + SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); + new (N) StoreSDNode(Ops, dl, VTs, AM, + ST->isTruncatingStore(), ST->getMemoryVT(), + ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getAlignment(), ST->isVolatile()); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getVAArg(MVT VT, DebugLoc dl, + SDValue Chain, SDValue Ptr, + SDValue SV) { + SDValue Ops[] = { Chain, Ptr, SV }; + return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + const SDUse *Ops, unsigned NumOps) { + switch (NumOps) { + case 0: return getNode(Opcode, DL, VT); + case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); + default: break; + } + + // Copy from an SDUse array into an SDValue array for use with + // the regular getNode logic. + SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps); + return getNode(Opcode, DL, VT, &NewOps[0], NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, + const SDValue *Ops, unsigned NumOps) { + switch (NumOps) { + case 0: return getNode(Opcode, DL, VT); + case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); + default: break; + } + + switch (Opcode) { + default: break; + case ISD::SELECT_CC: { + assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); + assert(Ops[0].getValueType() == Ops[1].getValueType() && + "LHS and RHS of condition must have same type!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "True and False arms of SelectCC must have same type!"); + assert(Ops[2].getValueType() == VT && + "select_cc node must be of same type as true and false value!"); + break; + } + case ISD::BR_CC: { + assert(NumOps == 5 && "BR_CC takes 5 operands!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "LHS/RHS of comparison should match types!"); + break; + } + } + + // Memoize nodes. + SDNode *N; + SDVTList VTs = getVTList(VT); + + if (VT != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); + void *IP = 0; + + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + N = NodeAllocator.Allocate<SDNode>(); + new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); + CSEMap.InsertNode(N, IP); + } else { + N = NodeAllocator.Allocate<SDNode>(); + new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); + } + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + const std::vector<MVT> &ResultTys, + const SDValue *Ops, unsigned NumOps) { + return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), + Ops, NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, + const MVT *VTs, unsigned NumVTs, + const SDValue *Ops, unsigned NumOps) { + if (NumVTs == 1) + return getNode(Opcode, DL, VTs[0], Ops, NumOps); + return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + const SDValue *Ops, unsigned NumOps) { + if (VTList.NumVTs == 1) + return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps); + + switch (Opcode) { + // FIXME: figure out how to safely handle things like + // int foo(int x) { return 1 << (x & 255); } + // int bar() { return foo(256); } +#if 0 + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: + case ISD::SHL_PARTS: + if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1) + return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); + else if (N3.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) { + // If the and is only masking out bits that cannot effect the shift, + // eliminate the and. + unsigned NumBits = VT.getSizeInBits()*2; + if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) + return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); + } + break; +#endif + } + + // Memoize the node unless it returns a flag. + SDNode *N; + if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + if (NumOps == 1) { + N = NodeAllocator.Allocate<UnarySDNode>(); + new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); + } else if (NumOps == 2) { + N = NodeAllocator.Allocate<BinarySDNode>(); + new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + } else if (NumOps == 3) { + N = NodeAllocator.Allocate<TernarySDNode>(); + new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]); + } else { + N = NodeAllocator.Allocate<SDNode>(); + new (N) SDNode(Opcode, DL, VTList, Ops, NumOps); + } + CSEMap.InsertNode(N, IP); + } else { + if (NumOps == 1) { + N = NodeAllocator.Allocate<UnarySDNode>(); + new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); + } else if (NumOps == 2) { + N = NodeAllocator.Allocate<BinarySDNode>(); + new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + } else if (NumOps == 3) { + N = NodeAllocator.Allocate<TernarySDNode>(); + new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]); + } else { + N = NodeAllocator.Allocate<SDNode>(); + new (N) SDNode(Opcode, DL, VTList, Ops, NumOps); + } + } + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) { + return getNode(Opcode, DL, VTList, 0, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1) { + SDValue Ops[] = { N1 }; + return getNode(Opcode, DL, VTList, Ops, 1); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2) { + SDValue Ops[] = { N1, N2 }; + return getNode(Opcode, DL, VTList, Ops, 2); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3) { + SDValue Ops[] = { N1, N2, N3 }; + return getNode(Opcode, DL, VTList, Ops, 3); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4) { + SDValue Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, DL, VTList, Ops, 4); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4, SDValue N5) { + SDValue Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, DL, VTList, Ops, 5); +} + +SDVTList SelectionDAG::getVTList(MVT VT) { + return makeVTList(SDNode::getValueTypeList(VT), 1); +} + +SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2) { + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2) + return *I; + + MVT *Array = Allocator.Allocate<MVT>(2); + Array[0] = VT1; + Array[1] = VT2; + SDVTList Result = makeVTList(Array, 2); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3) { + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && + I->VTs[2] == VT3) + return *I; + + MVT *Array = Allocator.Allocate<MVT>(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + SDVTList Result = makeVTList(Array, 3); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4) { + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) + if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && + I->VTs[2] == VT3 && I->VTs[3] == VT4) + return *I; + + MVT *Array = Allocator.Allocate<MVT>(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Array[3] = VT4; + SDVTList Result = makeVTList(Array, 4); + VTList.push_back(Result); + return Result; +} + +SDVTList SelectionDAG::getVTList(const MVT *VTs, unsigned NumVTs) { + switch (NumVTs) { + case 0: assert(0 && "Cannot have nodes without results!"); + case 1: return getVTList(VTs[0]); + case 2: return getVTList(VTs[0], VTs[1]); + case 3: return getVTList(VTs[0], VTs[1], VTs[2]); + default: break; + } + + for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), + E = VTList.rend(); I != E; ++I) { + if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1]) + continue; + + bool NoMatch = false; + for (unsigned i = 2; i != NumVTs; ++i) + if (VTs[i] != I->VTs[i]) { + NoMatch = true; + break; + } + if (!NoMatch) + return *I; + } + + MVT *Array = Allocator.Allocate<MVT>(NumVTs); + std::copy(VTs, VTs+NumVTs, Array); + SDVTList Result = makeVTList(Array, NumVTs); + VTList.push_back(Result); + return Result; +} + + +/// UpdateNodeOperands - *Mutate* the specified node in-place to have the +/// specified operands. If the resultant node already exists in the DAG, +/// this does not modify the specified node, instead it returns the node that +/// already exists. If the resultant node does not exist in the DAG, the +/// input node is returned. As a degenerate case, if you specify the same +/// input operands as the node already has, the input node is returned. +SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) { + SDNode *N = InN.getNode(); + assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op == N->getOperand(0)) return InN; + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) + return SDValue(Existing, InN.getResNo()); + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + N->OperandList[0].set(Op); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return InN; +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) { + SDNode *N = InN.getNode(); + assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)) + return InN; // No operands changed, just return the input node. + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) + return SDValue(Existing, InN.getResNo()); + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + if (N->OperandList[0] != Op1) + N->OperandList[0].set(Op1); + if (N->OperandList[1] != Op2) + N->OperandList[1].set(Op2); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return InN; +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, SDValue Op3) { + SDValue Ops[] = { Op1, Op2, Op3 }; + return UpdateNodeOperands(N, Ops, 3); +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4) { + SDValue Ops[] = { Op1, Op2, Op3, Op4 }; + return UpdateNodeOperands(N, Ops, 4); +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4, SDValue Op5) { + SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; + return UpdateNodeOperands(N, Ops, 5); +} + +SDValue SelectionDAG:: +UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { + SDNode *N = InN.getNode(); + assert(N->getNumOperands() == NumOps && + "Update with wrong number of operands"); + + // Check to see if there is no change. + bool AnyChange = false; + for (unsigned i = 0; i != NumOps; ++i) { + if (Ops[i] != N->getOperand(i)) { + AnyChange = true; + break; + } + } + + // No operands changed, just return the input node. + if (!AnyChange) return InN; + + // See if the modified node already exists. + void *InsertPos = 0; + if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos)) + return SDValue(Existing, InN.getResNo()); + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = 0; + + // Now we update the operands. + for (unsigned i = 0; i != NumOps; ++i) + if (N->OperandList[i] != Ops[i]) + N->OperandList[i].set(Ops[i]); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return InN; +} + +/// DropOperands - Release the operands and set this node to have +/// zero operands. +void SDNode::DropOperands() { + // Unlike the code in MorphNodeTo that does this, we don't need to + // watch for dead nodes here. + for (op_iterator I = op_begin(), E = op_end(); I != E; ) { + SDUse &Use = *I++; + Use.set(SDValue()); + } +} + +/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a +/// machine opcode. +/// +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT) { + SDVTList VTs = getVTList(VT); + return SelectNodeTo(N, MachineOpc, VTs, 0, 0); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT, SDValue Op1, + SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, MVT VT3, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, MVT VT3, MVT VT4, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); + return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, + SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + MVT VT1, MVT VT2, MVT VT3, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + SDVTList VTs, const SDValue *Ops, + unsigned NumOps) { + return MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT) { + SDVTList VTs = getVTList(VT); + return MorphNodeTo(N, Opc, VTs, 0, 0); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return MorphNodeTo(N, Opc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT, SDValue Op1, + SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return MorphNodeTo(N, Opc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return MorphNodeTo(N, Opc, VTs, Ops, 3); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT); + return MorphNodeTo(N, Opc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, const SDValue *Ops, + unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2); + return MorphNodeTo(N, Opc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, MVT VT3, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return MorphNodeTo(N, Opc, VTs, Ops, NumOps); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, + SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1 }; + return MorphNodeTo(N, Opc, VTs, Ops, 1); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return MorphNodeTo(N, Opc, VTs, Ops, 2); +} + +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + MVT VT1, MVT VT2, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return MorphNodeTo(N, Opc, VTs, Ops, 3); +} + +/// MorphNodeTo - These *mutate* the specified node to have the specified +/// return type, opcode, and operands. +/// +/// Note that MorphNodeTo returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. Note that the DebugLoc need not be the same. +/// +/// Using MorphNodeTo is faster than creating a new node and swapping it in +/// with ReplaceAllUsesWith both because it often avoids allocating a new +/// node, and because it doesn't require CSE recalculation for any of +/// the node's users. +/// +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + SDVTList VTs, const SDValue *Ops, + unsigned NumOps) { + // If an identical node already exists, use it. + void *IP = 0; + if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + return ON; + } + + if (!RemoveNodeFromCSEMaps(N)) + IP = 0; + + // Start the morphing. + N->NodeType = Opc; + N->ValueList = VTs.VTs; + N->NumValues = VTs.NumVTs; + + // Clear the operands list, updating used nodes to remove this from their + // use list. Keep track of any operands that become dead as a result. + SmallPtrSet<SDNode*, 16> DeadNodeSet; + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { + SDUse &Use = *I++; + SDNode *Used = Use.getNode(); + Use.set(SDValue()); + if (Used->use_empty()) + DeadNodeSet.insert(Used); + } + + // If NumOps is larger than the # of operands we currently have, reallocate + // the operand list. + if (NumOps > N->NumOperands) { + if (N->OperandsNeedDelete) + delete[] N->OperandList; + + if (N->isMachineOpcode()) { + // We're creating a final node that will live unmorphed for the + // remainder of the current SelectionDAG iteration, so we can allocate + // the operands directly out of a pool with no recycling metadata. + N->OperandList = OperandAllocator.Allocate<SDUse>(NumOps); + N->OperandsNeedDelete = false; + } else { + N->OperandList = new SDUse[NumOps]; + N->OperandsNeedDelete = true; + } + } + + // Assign the new operands. + N->NumOperands = NumOps; + for (unsigned i = 0, e = NumOps; i != e; ++i) { + N->OperandList[i].setUser(N); + N->OperandList[i].setInitial(Ops[i]); + } + + // Delete any nodes that are still dead after adding the uses for the + // new operands. + SmallVector<SDNode *, 16> DeadNodes; + for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(), + E = DeadNodeSet.end(); I != E; ++I) + if ((*I)->use_empty()) + DeadNodes.push_back(*I); + RemoveDeadNodes(DeadNodes); + + if (IP) + CSEMap.InsertNode(N, IP); // Memoize the new node. + return N; +} + + +/// getTargetNode - These are used for target selectors to create a new node +/// with specified return type(s), target opcode, and operands. +/// +/// Note that getTargetNode returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT) { + return getNode(~Opcode, dl, VT).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, + SDValue Op1) { + return getNode(~Opcode, dl, VT, Op1).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, + SDValue Op1, SDValue Op2) { + return getNode(~Opcode, dl, VT, Op1, Op2).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, + SDValue Op1, SDValue Op2, + SDValue Op3) { + return getNode(~Opcode, dl, VT, Op1, Op2, Op3).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, + const SDValue *Ops, unsigned NumOps) { + return getNode(~Opcode, dl, VT, Ops, NumOps).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Op; + return getNode(~Opcode, dl, VTs, &Op, 0).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, + MVT VT2, SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + return getNode(~Opcode, dl, VTs, &Op1, 1).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, + MVT VT2, SDValue Op1, + SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return getNode(~Opcode, dl, VTs, Ops, 2).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, + MVT VT2, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getNode(~Opcode, dl, VTs, Ops, 3).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2); + return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2, MVT VT3, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2 }; + return getNode(~Opcode, dl, VTs, Ops, 2).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2, MVT VT3, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getNode(~Opcode, dl, VTs, Ops, 3).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + MVT VT1, MVT VT2, MVT VT3, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, + MVT VT2, MVT VT3, MVT VT4, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); + return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); +} + +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, + const std::vector<MVT> &ResultTys, + const SDValue *Ops, unsigned NumOps) { + return getNode(~Opcode, dl, ResultTys, Ops, NumOps).getNode(); +} + +/// getNodeIfExists - Get the specified node if it's already available, or +/// else return NULL. +SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, + const SDValue *Ops, unsigned NumOps) { + if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return E; + } + return NULL; +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes From has a single result value. +/// +void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, + DAGUpdateListener *UpdateListener) { + SDNode *From = FromN.getNode(); + assert(From->getNumValues() == 1 && FromN.getResNo() == 0 && + "Cannot replace with this method!"); + assert(From != To.getNode() && "Cannot replace uses of with self"); + + // Iterate over all the existing uses of From. New uses will be added + // to the beginning of the use list, which we avoid visiting. + // This specifically avoids visiting uses of From that arise while the + // replacement is happening, because any such uses would be the result + // of CSE: If an existing node looks like From after one of its operands + // is replaced by To, we don't want to replace of all its users with To + // too. See PR3018 for more info. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + ++UI; + Use.set(To); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes that for each value of From, there is a +/// corresponding value in To in the same position with the same type. +/// +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, + DAGUpdateListener *UpdateListener) { +#ifndef NDEBUG + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + assert((!From->hasAnyUseOfValue(i) || + From->getValueType(i) == To->getValueType(i)) && + "Cannot use this version of ReplaceAllUsesWith!"); +#endif + + // Handle the trivial case. + if (From == To) + return; + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + ++UI; + Use.setNode(To); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version can replace From with any result values. To must match the +/// number and types of values returned by From. +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, + const SDValue *To, + DAGUpdateListener *UpdateListener) { + if (From->getNumValues() == 1) // Handle the simple case efficiently. + return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener); + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + const SDValue &ToOp = To[Use.getResNo()]; + ++UI; + Use.set(ToOp); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.getNode() alone. The Deleted +/// vector is handled the same way as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, + DAGUpdateListener *UpdateListener){ + // Handle the really simple, really trivial case efficiently. + if (From == To) return; + + // Handle the simple, trivial, case efficiently. + if (From.getNode()->getNumValues() == 1) { + ReplaceAllUsesWith(From, To, UpdateListener); + return; + } + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From.getNode()->use_begin(), + UE = From.getNode()->use_end(); + while (UI != UE) { + SDNode *User = *UI; + bool UserRemovedFromCSEMaps = false; + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + + // Skip uses of different values from the same node. + if (Use.getResNo() != From.getResNo()) { + ++UI; + continue; + } + + // If this node hasn't been modified yet, it's still in the CSE maps, + // so remove its old self from the CSE maps. + if (!UserRemovedFromCSEMaps) { + RemoveNodeFromCSEMaps(User); + UserRemovedFromCSEMaps = true; + } + + ++UI; + Use.set(To); + } while (UI != UE && *UI == User); + + // We are iterating over all uses of the From node, so if a use + // doesn't use the specific value, no changes are made. + if (!UserRemovedFromCSEMaps) + continue; + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +namespace { + /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith + /// to record information about a use. + struct UseMemo { + SDNode *User; + unsigned Index; + SDUse *Use; + }; + + /// operator< - Sort Memos by User. + bool operator<(const UseMemo &L, const UseMemo &R) { + return (intptr_t)L.User < (intptr_t)R.User; + } +} + +/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.getNode() alone. The same value +/// may appear in both the From and To list. The Deleted vector is +/// handled the same way as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, + const SDValue *To, + unsigned Num, + DAGUpdateListener *UpdateListener){ + // Handle the simple, trivial case efficiently. + if (Num == 1) + return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener); + + // Read up all the uses and make records of them. This helps + // processing new uses that are introduced during the + // replacement process. + SmallVector<UseMemo, 4> Uses; + for (unsigned i = 0; i != Num; ++i) { + unsigned FromResNo = From[i].getResNo(); + SDNode *FromNode = From[i].getNode(); + for (SDNode::use_iterator UI = FromNode->use_begin(), + E = FromNode->use_end(); UI != E; ++UI) { + SDUse &Use = UI.getUse(); + if (Use.getResNo() == FromResNo) { + UseMemo Memo = { *UI, i, &Use }; + Uses.push_back(Memo); + } + } + } + + // Sort the uses, so that all the uses from a given User are together. + std::sort(Uses.begin(), Uses.end()); + + for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); + UseIndex != UseIndexEnd; ) { + // We know that this user uses some value of From. If it is the right + // value, update it. + SDNode *User = Uses[UseIndex].User; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // The Uses array is sorted, so all the uses for a given User + // are next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + unsigned i = Uses[UseIndex].Index; + SDUse &Use = *Uses[UseIndex].Use; + ++UseIndex; + + Use.set(To[i]); + } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User, UpdateListener); + } +} + +/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG +/// based on their topological order. It returns the maximum id and a vector +/// of the SDNodes* in assigned order by reference. +unsigned SelectionDAG::AssignTopologicalOrder() { + + unsigned DAGSize = 0; + + // SortedPos tracks the progress of the algorithm. Nodes before it are + // sorted, nodes after it are unsorted. When the algorithm completes + // it is at the end of the list. + allnodes_iterator SortedPos = allnodes_begin(); + + // Visit all the nodes. Move nodes with no operands to the front of + // the list immediately. Annotate nodes that do have operands with their + // operand count. Before we do this, the Node Id fields of the nodes + // may contain arbitrary values. After, the Node Id fields for nodes + // before SortedPos will contain the topological sort index, and the + // Node Id fields for nodes At SortedPos and after will contain the + // count of outstanding operands. + for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { + SDNode *N = I++; + unsigned Degree = N->getNumOperands(); + if (Degree == 0) { + // A node with no uses, add it to the result array immediately. + N->setNodeId(DAGSize++); + allnodes_iterator Q = N; + if (Q != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); + ++SortedPos; + } else { + // Temporarily use the Node Id as scratch space for the degree count. + N->setNodeId(Degree); + } + } + + // Visit all the nodes. As we iterate, moves nodes into sorted order, + // such that by the time the end is reached all nodes will be sorted. + for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) { + SDNode *N = I; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDNode *P = *UI; + unsigned Degree = P->getNodeId(); + --Degree; + if (Degree == 0) { + // All of P's operands are sorted, so P may sorted now. + P->setNodeId(DAGSize++); + if (P != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P)); + ++SortedPos; + } else { + // Update P's outstanding operand count. + P->setNodeId(Degree); + } + } + } + + assert(SortedPos == AllNodes.end() && + "Topological sort incomplete!"); + assert(AllNodes.front().getOpcode() == ISD::EntryToken && + "First node in topological sort is not the entry token!"); + assert(AllNodes.front().getNodeId() == 0 && + "First node in topological sort has non-zero id!"); + assert(AllNodes.front().getNumOperands() == 0 && + "First node in topological sort has operands!"); + assert(AllNodes.back().getNodeId() == (int)DAGSize-1 && + "Last node in topologic sort has unexpected id!"); + assert(AllNodes.back().use_empty() && + "Last node in topologic sort has users!"); + assert(DAGSize == allnodes_size() && "Node count mismatch!"); + return DAGSize; +} + + + +//===----------------------------------------------------------------------===// +// SDNode Class +//===----------------------------------------------------------------------===// + +HandleSDNode::~HandleSDNode() { + DropOperands(); +} + +GlobalAddressSDNode::GlobalAddressSDNode(bool isTarget, const GlobalValue *GA, + MVT VT, int64_t o) + : SDNode(isa<GlobalVariable>(GA) && + cast<GlobalVariable>(GA)->isThreadLocal() ? + // Thread Local + (isTarget ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress) : + // Non Thread Local + (isTarget ? ISD::TargetGlobalAddress : ISD::GlobalAddress), + DebugLoc::getUnknownLoc(), getSDVTList(VT)), Offset(o) { + TheGlobal = const_cast<GlobalValue*>(GA); +} + +MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, MVT memvt, + const Value *srcValue, int SVO, + unsigned alignment, bool vol) + : SDNode(Opc, dl, VTs), MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment); + assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!"); + assert(getAlignment() == alignment && "Alignment representation error!"); + assert(isVolatile() == vol && "Volatile representation error!"); +} + +MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, + const SDValue *Ops, + unsigned NumOps, MVT memvt, const Value *srcValue, + int SVO, unsigned alignment, bool vol) + : SDNode(Opc, dl, VTs, Ops, NumOps), + MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment); + assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!"); + assert(getAlignment() == alignment && "Alignment representation error!"); + assert(isVolatile() == vol && "Volatile representation error!"); +} + +/// getMemOperand - Return a MachineMemOperand object describing the memory +/// reference performed by this memory reference. +MachineMemOperand MemSDNode::getMemOperand() const { + int Flags = 0; + if (isa<LoadSDNode>(this)) + Flags = MachineMemOperand::MOLoad; + else if (isa<StoreSDNode>(this)) + Flags = MachineMemOperand::MOStore; + else if (isa<AtomicSDNode>(this)) { + Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + } + else { + const MemIntrinsicSDNode* MemIntrinNode = dyn_cast<MemIntrinsicSDNode>(this); + assert(MemIntrinNode && "Unknown MemSDNode opcode!"); + if (MemIntrinNode->readMem()) Flags |= MachineMemOperand::MOLoad; + if (MemIntrinNode->writeMem()) Flags |= MachineMemOperand::MOStore; + } + + int Size = (getMemoryVT().getSizeInBits() + 7) >> 3; + if (isVolatile()) Flags |= MachineMemOperand::MOVolatile; + + // Check if the memory reference references a frame index + const FrameIndexSDNode *FI = + dyn_cast<const FrameIndexSDNode>(getBasePtr().getNode()); + if (!getSrcValue() && FI) + return MachineMemOperand(PseudoSourceValue::getFixedStack(FI->getIndex()), + Flags, 0, Size, getAlignment()); + else + return MachineMemOperand(getSrcValue(), Flags, getSrcValueOffset(), + Size, getAlignment()); +} + +/// Profile - Gather unique data for the node. +/// +void SDNode::Profile(FoldingSetNodeID &ID) const { + AddNodeIDNode(ID, this); +} + +/// getValueTypeList - Return a pointer to the specified value type. +/// +const MVT *SDNode::getValueTypeList(MVT VT) { + if (VT.isExtended()) { + static std::set<MVT, MVT::compareRawBits> EVTs; + return &(*EVTs.insert(VT).first); + } else { + static MVT VTs[MVT::LAST_VALUETYPE]; + VTs[VT.getSimpleVT()] = VT; + return &VTs[VT.getSimpleVT()]; + } +} + +/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the +/// indicated value. This method ignores uses of other values defined by this +/// operation. +bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + // TODO: Only iterate over uses of a given value of the node + for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) { + if (UI.getUse().getResNo() == Value) { + if (NUses == 0) + return false; + --NUses; + } + } + + // Found exactly the right number of uses? + return NUses == 0; +} + + +/// hasAnyUseOfValue - Return true if there are any use of the indicated +/// value. This method ignores uses of other values defined by this operation. +bool SDNode::hasAnyUseOfValue(unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) + if (UI.getUse().getResNo() == Value) + return true; + + return false; +} + + +/// isOnlyUserOf - Return true if this node is the only use of N. +/// +bool SDNode::isOnlyUserOf(SDNode *N) const { + bool Seen = false; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDNode *User = *I; + if (User == this) + Seen = true; + else + return false; + } + + return Seen; +} + +/// isOperand - Return true if this node is an operand of N. +/// +bool SDValue::isOperandOf(SDNode *N) const { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (*this == N->getOperand(i)) + return true; + return false; +} + +bool SDNode::isOperandOf(SDNode *N) const { + for (unsigned i = 0, e = N->NumOperands; i != e; ++i) + if (this == N->OperandList[i].getNode()) + return true; + return false; +} + +/// reachesChainWithoutSideEffects - Return true if this operand (which must +/// be a chain) reaches the specified operand without crossing any +/// side-effecting instructions. In practice, this looks through token +/// factors and non-volatile loads. In order to remain efficient, this only +/// looks a couple of nodes in, it does not do an exhaustive search. +bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, + unsigned Depth) const { + if (*this == Dest) return true; + + // Don't search too deeply, we just want to be able to see through + // TokenFactor's etc. + if (Depth == 0) return false; + + // If this is a token factor, all inputs to the TF happen in parallel. If any + // of the operands of the TF reach dest, then we can do the xform. + if (getOpcode() == ISD::TokenFactor) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1)) + return true; + return false; + } + + // Loads don't have side effects, look through them. + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) { + if (!Ld->isVolatile()) + return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); + } + return false; +} + + +static void findPredecessor(SDNode *N, const SDNode *P, bool &found, + SmallPtrSet<SDNode *, 32> &Visited) { + if (found || !Visited.insert(N)) + return; + + for (unsigned i = 0, e = N->getNumOperands(); !found && i != e; ++i) { + SDNode *Op = N->getOperand(i).getNode(); + if (Op == P) { + found = true; + return; + } + findPredecessor(Op, P, found, Visited); + } +} + +/// isPredecessorOf - Return true if this node is a predecessor of N. This node +/// is either an operand of N or it can be reached by recursively traversing +/// up the operands. +/// NOTE: this is an expensive method. Use it carefully. +bool SDNode::isPredecessorOf(SDNode *N) const { + SmallPtrSet<SDNode *, 32> Visited; + bool found = false; + findPredecessor(N, this, found, Visited); + return found; +} + +uint64_t SDNode::getConstantOperandVal(unsigned Num) const { + assert(Num < NumOperands && "Invalid child # of SDNode!"); + return cast<ConstantSDNode>(OperandList[Num])->getZExtValue(); +} + +std::string SDNode::getOperationName(const SelectionDAG *G) const { + switch (getOpcode()) { + default: + if (getOpcode() < ISD::BUILTIN_OP_END) + return "<<Unknown DAG Node>>"; + if (isMachineOpcode()) { + if (G) + if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) + if (getMachineOpcode() < TII->getNumOpcodes()) + return TII->get(getMachineOpcode()).getName(); + return "<<Unknown Machine Node>>"; + } + if (G) { + const TargetLowering &TLI = G->getTargetLoweringInfo(); + const char *Name = TLI.getTargetNodeName(getOpcode()); + if (Name) return Name; + return "<<Unknown Target Node>>"; + } + return "<<Unknown Node>>"; + +#ifndef NDEBUG + case ISD::DELETED_NODE: + return "<<Deleted Node!>>"; +#endif + case ISD::PREFETCH: return "Prefetch"; + case ISD::MEMBARRIER: return "MemBarrier"; + case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_SWAP: return "AtomicSwap"; + case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; + case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; + case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; + case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; + case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; + case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; + case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; + case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; + case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::PCMARKER: return "PCMarker"; + case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::SRCVALUE: return "SrcValue"; + case ISD::MEMOPERAND: return "MemOperand"; + case ISD::EntryToken: return "EntryToken"; + case ISD::TokenFactor: return "TokenFactor"; + case ISD::AssertSext: return "AssertSext"; + case ISD::AssertZext: return "AssertZext"; + + case ISD::BasicBlock: return "BasicBlock"; + case ISD::ARG_FLAGS: return "ArgFlags"; + case ISD::VALUETYPE: return "ValueType"; + case ISD::Register: return "Register"; + + case ISD::Constant: return "Constant"; + case ISD::ConstantFP: return "ConstantFP"; + case ISD::GlobalAddress: return "GlobalAddress"; + case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; + case ISD::FrameIndex: return "FrameIndex"; + case ISD::JumpTable: return "JumpTable"; + case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; + case ISD::RETURNADDR: return "RETURNADDR"; + case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; + case ISD::EHSELECTION: return "EHSELECTION"; + case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::ConstantPool: return "ConstantPool"; + case ISD::ExternalSymbol: return "ExternalSymbol"; + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IID = cast<ConstantSDNode>(getOperand(0))->getZExtValue(); + return Intrinsic::getName((Intrinsic::ID)IID); + } + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned IID = cast<ConstantSDNode>(getOperand(1))->getZExtValue(); + return Intrinsic::getName((Intrinsic::ID)IID); + } + + case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; + case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstantFP:return "TargetConstantFP"; + case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; + case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; + case ISD::TargetFrameIndex: return "TargetFrameIndex"; + case ISD::TargetJumpTable: return "TargetJumpTable"; + case ISD::TargetConstantPool: return "TargetConstantPool"; + case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; + + case ISD::CopyToReg: return "CopyToReg"; + case ISD::CopyFromReg: return "CopyFromReg"; + case ISD::UNDEF: return "undef"; + case ISD::MERGE_VALUES: return "merge_values"; + case ISD::INLINEASM: return "inlineasm"; + case ISD::DBG_LABEL: return "dbg_label"; + case ISD::EH_LABEL: return "eh_label"; + case ISD::DECLARE: return "declare"; + case ISD::HANDLENODE: return "handlenode"; + case ISD::FORMAL_ARGUMENTS: return "formal_arguments"; + case ISD::CALL: return "call"; + + // Unary operators + case ISD::FABS: return "fabs"; + case ISD::FNEG: return "fneg"; + case ISD::FSQRT: return "fsqrt"; + case ISD::FSIN: return "fsin"; + case ISD::FCOS: return "fcos"; + case ISD::FPOWI: return "fpowi"; + case ISD::FPOW: return "fpow"; + case ISD::FTRUNC: return "ftrunc"; + case ISD::FFLOOR: return "ffloor"; + case ISD::FCEIL: return "fceil"; + case ISD::FRINT: return "frint"; + case ISD::FNEARBYINT: return "fnearbyint"; + + // Binary operators + case ISD::ADD: return "add"; + case ISD::SUB: return "sub"; + case ISD::MUL: return "mul"; + case ISD::MULHU: return "mulhu"; + case ISD::MULHS: return "mulhs"; + case ISD::SDIV: return "sdiv"; + case ISD::UDIV: return "udiv"; + case ISD::SREM: return "srem"; + case ISD::UREM: return "urem"; + case ISD::SMUL_LOHI: return "smul_lohi"; + case ISD::UMUL_LOHI: return "umul_lohi"; + case ISD::SDIVREM: return "sdivrem"; + case ISD::UDIVREM: return "udivrem"; + case ISD::AND: return "and"; + case ISD::OR: return "or"; + case ISD::XOR: return "xor"; + case ISD::SHL: return "shl"; + case ISD::SRA: return "sra"; + case ISD::SRL: return "srl"; + case ISD::ROTL: return "rotl"; + case ISD::ROTR: return "rotr"; + case ISD::FADD: return "fadd"; + case ISD::FSUB: return "fsub"; + case ISD::FMUL: return "fmul"; + case ISD::FDIV: return "fdiv"; + case ISD::FREM: return "frem"; + case ISD::FCOPYSIGN: return "fcopysign"; + case ISD::FGETSIGN: return "fgetsign"; + + case ISD::SETCC: return "setcc"; + case ISD::VSETCC: return "vsetcc"; + case ISD::SELECT: return "select"; + case ISD::SELECT_CC: return "select_cc"; + case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; + case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; + case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; + case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::CARRY_FALSE: return "carry_false"; + case ISD::ADDC: return "addc"; + case ISD::ADDE: return "adde"; + case ISD::SADDO: return "saddo"; + case ISD::UADDO: return "uaddo"; + case ISD::SSUBO: return "ssubo"; + case ISD::USUBO: return "usubo"; + case ISD::SMULO: return "smulo"; + case ISD::UMULO: return "umulo"; + case ISD::SUBC: return "subc"; + case ISD::SUBE: return "sube"; + case ISD::SHL_PARTS: return "shl_parts"; + case ISD::SRA_PARTS: return "sra_parts"; + case ISD::SRL_PARTS: return "srl_parts"; + + // Conversion operators. + case ISD::SIGN_EXTEND: return "sign_extend"; + case ISD::ZERO_EXTEND: return "zero_extend"; + case ISD::ANY_EXTEND: return "any_extend"; + case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::TRUNCATE: return "truncate"; + case ISD::FP_ROUND: return "fp_round"; + case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::FP_ROUND_INREG: return "fp_round_inreg"; + case ISD::FP_EXTEND: return "fp_extend"; + + case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::BIT_CONVERT: return "bit_convert"; + + case ISD::CONVERT_RNDSAT: { + switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { + default: assert(0 && "Unknown cvt code!"); + case ISD::CVT_FF: return "cvt_ff"; + case ISD::CVT_FS: return "cvt_fs"; + case ISD::CVT_FU: return "cvt_fu"; + case ISD::CVT_SF: return "cvt_sf"; + case ISD::CVT_UF: return "cvt_uf"; + case ISD::CVT_SS: return "cvt_ss"; + case ISD::CVT_SU: return "cvt_su"; + case ISD::CVT_US: return "cvt_us"; + case ISD::CVT_UU: return "cvt_uu"; + } + } + + // Control flow instructions + case ISD::BR: return "br"; + case ISD::BRIND: return "brind"; + case ISD::BR_JT: return "br_jt"; + case ISD::BRCOND: return "brcond"; + case ISD::BR_CC: return "br_cc"; + case ISD::RET: return "ret"; + case ISD::CALLSEQ_START: return "callseq_start"; + case ISD::CALLSEQ_END: return "callseq_end"; + + // Other operators + case ISD::LOAD: return "load"; + case ISD::STORE: return "store"; + case ISD::VAARG: return "vaarg"; + case ISD::VACOPY: return "vacopy"; + case ISD::VAEND: return "vaend"; + case ISD::VASTART: return "vastart"; + case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; + case ISD::EXTRACT_ELEMENT: return "extract_element"; + case ISD::BUILD_PAIR: return "build_pair"; + case ISD::STACKSAVE: return "stacksave"; + case ISD::STACKRESTORE: return "stackrestore"; + case ISD::TRAP: return "trap"; + + // Bit manipulation + case ISD::BSWAP: return "bswap"; + case ISD::CTPOP: return "ctpop"; + case ISD::CTTZ: return "cttz"; + case ISD::CTLZ: return "ctlz"; + + // Debug info + case ISD::DBG_STOPPOINT: return "dbg_stoppoint"; + case ISD::DEBUG_LOC: return "debug_loc"; + + // Trampolines + case ISD::TRAMPOLINE: return "trampoline"; + + case ISD::CONDCODE: + switch (cast<CondCodeSDNode>(this)->get()) { + default: assert(0 && "Unknown setcc condition!"); + case ISD::SETOEQ: return "setoeq"; + case ISD::SETOGT: return "setogt"; + case ISD::SETOGE: return "setoge"; + case ISD::SETOLT: return "setolt"; + case ISD::SETOLE: return "setole"; + case ISD::SETONE: return "setone"; + + case ISD::SETO: return "seto"; + case ISD::SETUO: return "setuo"; + case ISD::SETUEQ: return "setue"; + case ISD::SETUGT: return "setugt"; + case ISD::SETUGE: return "setuge"; + case ISD::SETULT: return "setult"; + case ISD::SETULE: return "setule"; + case ISD::SETUNE: return "setune"; + + case ISD::SETEQ: return "seteq"; + case ISD::SETGT: return "setgt"; + case ISD::SETGE: return "setge"; + case ISD::SETLT: return "setlt"; + case ISD::SETLE: return "setle"; + case ISD::SETNE: return "setne"; + } + } +} + +const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { + switch (AM) { + default: + return ""; + case ISD::PRE_INC: + return "<pre-inc>"; + case ISD::PRE_DEC: + return "<pre-dec>"; + case ISD::POST_INC: + return "<post-inc>"; + case ISD::POST_DEC: + return "<post-dec>"; + } +} + +std::string ISD::ArgFlagsTy::getArgFlagsString() { + std::string S = "< "; + + if (isZExt()) + S += "zext "; + if (isSExt()) + S += "sext "; + if (isInReg()) + S += "inreg "; + if (isSRet()) + S += "sret "; + if (isByVal()) + S += "byval "; + if (isNest()) + S += "nest "; + if (getByValAlign()) + S += "byval-align:" + utostr(getByValAlign()) + " "; + if (getOrigAlign()) + S += "orig-align:" + utostr(getOrigAlign()) + " "; + if (getByValSize()) + S += "byval-size:" + utostr(getByValSize()) + " "; + return S + ">"; +} + +void SDNode::dump() const { dump(0); } +void SDNode::dump(const SelectionDAG *G) const { + print(errs(), G); +} + +void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { + OS << (void*)this << ": "; + + for (unsigned i = 0, e = getNumValues(); i != e; ++i) { + if (i) OS << ","; + if (getValueType(i) == MVT::Other) + OS << "ch"; + else + OS << getValueType(i).getMVTString(); + } + OS << " = " << getOperationName(G); +} + +void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) { + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(this); + OS << "<"; + for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { + int Idx = SVN->getMaskElt(i); + if (i) OS << ","; + if (Idx < 0) + OS << "u"; + else + OS << Idx; + } + OS << ">"; + } + + if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { + OS << '<' << CSDN->getAPIntValue() << '>'; + } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { + if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) + OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; + else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) + OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; + else { + OS << "<APFloat("; + CSDN->getValueAPF().bitcastToAPInt().dump(); + OS << ")>"; + } + } else if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(this)) { + int64_t offset = GADN->getOffset(); + OS << '<'; + WriteAsOperand(OS, GADN->getGlobal()); + OS << '>'; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { + OS << "<" << FIDN->getIndex() << ">"; + } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { + OS << "<" << JTDN->getIndex() << ">"; + } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ + int offset = CP->getOffset(); + if (CP->isMachineConstantPoolEntry()) + OS << "<" << *CP->getMachineCPVal() << ">"; + else + OS << "<" << *CP->getConstVal() << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { + OS << "<"; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + OS << LBB->getName() << " "; + OS << (const void*)BBDN->getBasicBlock() << ">"; + } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { + if (G && R->getReg() && + TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + OS << " " << G->getTarget().getRegisterInfo()->getName(R->getReg()); + } else { + OS << " #" << R->getReg(); + } + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(this)) { + OS << "'" << ES->getSymbol() << "'"; + } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { + if (M->getValue()) + OS << "<" << M->getValue() << ">"; + else + OS << "<null>"; + } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(this)) { + if (M->MO.getValue()) + OS << "<" << M->MO.getValue() << ":" << M->MO.getOffset() << ">"; + else + OS << "<null:" << M->MO.getOffset() << ">"; + } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(this)) { + OS << N->getArgFlags().getArgFlagsString(); + } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { + OS << ":" << N->getVT().getMVTString(); + } + else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { + const Value *SrcValue = LD->getSrcValue(); + int SrcOffset = LD->getSrcValueOffset(); + OS << " <"; + if (SrcValue) + OS << SrcValue; + else + OS << "null"; + OS << ":" << SrcOffset << ">"; + + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << " <anyext "; break; + case ISD::SEXTLOAD: OS << " <sext "; break; + case ISD::ZEXTLOAD: OS << " <zext "; break; + } + if (doExt) + OS << LD->getMemoryVT().getMVTString() << ">"; + + const char *AM = getIndexedModeName(LD->getAddressingMode()); + if (*AM) + OS << " " << AM; + if (LD->isVolatile()) + OS << " <volatile>"; + OS << " alignment=" << LD->getAlignment(); + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { + const Value *SrcValue = ST->getSrcValue(); + int SrcOffset = ST->getSrcValueOffset(); + OS << " <"; + if (SrcValue) + OS << SrcValue; + else + OS << "null"; + OS << ":" << SrcOffset << ">"; + + if (ST->isTruncatingStore()) + OS << " <trunc " << ST->getMemoryVT().getMVTString() << ">"; + + const char *AM = getIndexedModeName(ST->getAddressingMode()); + if (*AM) + OS << " " << AM; + if (ST->isVolatile()) + OS << " <volatile>"; + OS << " alignment=" << ST->getAlignment(); + } else if (const AtomicSDNode* AT = dyn_cast<AtomicSDNode>(this)) { + const Value *SrcValue = AT->getSrcValue(); + int SrcOffset = AT->getSrcValueOffset(); + OS << " <"; + if (SrcValue) + OS << SrcValue; + else + OS << "null"; + OS << ":" << SrcOffset << ">"; + if (AT->isVolatile()) + OS << " <volatile>"; + OS << " alignment=" << AT->getAlignment(); + } +} + +void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + OS << " "; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (i) OS << ", "; + OS << (void*)getOperand(i).getNode(); + if (unsigned RN = getOperand(i).getResNo()) + OS << ":" << RN; + } + print_details(OS, G); +} + +static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getNode()->hasOneUse()) + DumpNodes(N->getOperand(i).getNode(), indent+2, G); + else + cerr << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).getNode() << ": <multiple use>"; + + + cerr << "\n" << std::string(indent, ' '); + N->dump(G); +} + +void SelectionDAG::dump() const { + cerr << "SelectionDAG has " << AllNodes.size() << " nodes:"; + + for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); + I != E; ++I) { + const SDNode *N = I; + if (!N->hasOneUse() && N != getRoot().getNode()) + DumpNodes(N, 2, this); + } + + if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); + + cerr << "\n\n"; +} + +void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + print_details(OS, G); +} + +typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; +static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, + const SelectionDAG *G, VisitedSDNodeSet &once) { + if (!once.insert(N)) // If we've been here before, return now. + return; + // Dump the current SDNode, but don't end the line yet. + OS << std::string(indent, ' '); + N->printr(OS, G); + // Having printed this SDNode, walk the children: + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + if (i) OS << ","; + OS << " "; + if (child->getNumOperands() == 0) { + // This child has no grandchildren; print it inline right here. + child->printr(OS, G); + once.insert(child); + } else { // Just the address. FIXME: also print the child's opcode + OS << (void*)child; + if (unsigned RN = N->getOperand(i).getResNo()) + OS << ":" << RN; + } + } + OS << "\n"; + // Dump children that have grandchildren on their own line(s). + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + DumpNodesr(OS, child, indent+2, G, once); + } +} + +void SDNode::dumpr() const { + VisitedSDNodeSet once; + DumpNodesr(errs(), this, 0, 0, once); +} + + +// getAddressSpace - Return the address space this GlobalAddress belongs to. +unsigned GlobalAddressSDNode::getAddressSpace() const { + return getGlobal()->getType()->getAddressSpace(); +} + + +const Type *ConstantPoolSDNode::getType() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getType(); + return Val.ConstVal->getType(); +} + +bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, + APInt &SplatUndef, + unsigned &SplatBitSize, + bool &HasAnyUndefs, + unsigned MinSplatBits) { + MVT VT = getValueType(0); + assert(VT.isVector() && "Expected a vector type"); + unsigned sz = VT.getSizeInBits(); + if (MinSplatBits > sz) + return false; + + SplatValue = APInt(sz, 0); + SplatUndef = APInt(sz, 0); + + // Get the bits. Bits with undefined values (when the corresponding element + // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared + // in SplatValue. If any of the values are not constant, give up and return + // false. + unsigned int nOps = getNumOperands(); + assert(nOps > 0 && "isConstantSplat has 0-size build vector"); + unsigned EltBitSize = VT.getVectorElementType().getSizeInBits(); + for (unsigned i = 0; i < nOps; ++i) { + SDValue OpVal = getOperand(i); + unsigned BitPos = i * EltBitSize; + + if (OpVal.getOpcode() == ISD::UNDEF) + SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos +EltBitSize); + else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) + SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize). + zextOrTrunc(sz) << BitPos); + else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) + SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos; + else + return false; + } + + // The build_vector is all constants or undefs. Find the smallest element + // size that splats the vector. + + HasAnyUndefs = (SplatUndef != 0); + while (sz > 8) { + + unsigned HalfSize = sz / 2; + APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize); + APInt LowValue = APInt(SplatValue).trunc(HalfSize); + APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize); + APInt LowUndef = APInt(SplatUndef).trunc(HalfSize); + + // If the two halves do not match (ignoring undef bits), stop here. + if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || + MinSplatBits > HalfSize) + break; + + SplatValue = HighValue | LowValue; + SplatUndef = HighUndef & LowUndef; + + sz = HalfSize; + } + + SplatBitSize = sz; + return true; +} + +bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) { + // Find the first non-undef value in the shuffle mask. + unsigned i, e; + for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i) + /* search */; + + assert(i != e && "VECTOR_SHUFFLE node with all undef indices!"); + + // Make sure all remaining elements are either undef or the same as the first + // non-undef value. + for (int Idx = Mask[i]; i != e; ++i) + if (Mask[i] >= 0 && Mask[i] != Idx) + return false; + return true; +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp new file mode 100644 index 000000000000..889d7f5dd934 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -0,0 +1,6052 @@ +//===-- SelectionDAGBuild.cpp - Selection-DAG building --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating from LLVM IR into SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "isel" +#include "SelectionDAGBuild.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Constants.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +using namespace llvm; + +/// LimitFloatPrecision - Generate low-precision inline sequences for +/// some float libcalls (6, 8 or 12 bits). +static unsigned LimitFloatPrecision; + +static cl::opt<unsigned, true> +LimitFPPrecision("limit-float-precision", + cl::desc("Generate low-precision inline sequences " + "for some float libcalls"), + cl::location(LimitFloatPrecision), + cl::init(0)); + +/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence +/// of insertvalue or extractvalue indices that identify a member, return +/// the linearized index of the start of the member. +/// +static unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, + const unsigned *Indices, + const unsigned *IndicesEnd, + unsigned CurIndex = 0) { + // Base case: We're done. + if (Indices && Indices == IndicesEnd) + return CurIndex; + + // Given a struct type, recursively traverse the elements. + if (const StructType *STy = dyn_cast<StructType>(Ty)) { + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) { + if (Indices && *Indices == unsigned(EI - EB)) + return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex); + } + return CurIndex; + } + // Given an array type, recursively traverse the elements. + else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + const Type *EltTy = ATy->getElementType(); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { + if (Indices && *Indices == i) + return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex); + } + return CurIndex; + } + // We haven't found the type we're looking for, so keep searching. + return CurIndex + 1; +} + +/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of +/// MVTs that represent all the individual underlying +/// non-aggregate types that comprise it. +/// +/// If Offsets is non-null, it points to a vector to be filled in +/// with the in-memory offsets of each of the individual values. +/// +static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, + SmallVectorImpl<MVT> &ValueVTs, + SmallVectorImpl<uint64_t> *Offsets = 0, + uint64_t StartingOffset = 0) { + // Given a struct type, recursively traverse the elements. + if (const StructType *STy = dyn_cast<StructType>(Ty)) { + const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy); + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) + ComputeValueVTs(TLI, *EI, ValueVTs, Offsets, + StartingOffset + SL->getElementOffset(EI - EB)); + return; + } + // Given an array type, recursively traverse the elements. + if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + const Type *EltTy = ATy->getElementType(); + uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, + StartingOffset + i * EltSize); + return; + } + // Interpret void as zero return values. + if (Ty == Type::VoidTy) + return; + // Base case: we can get an MVT for this LLVM IR type. + ValueVTs.push_back(TLI.getValueType(Ty)); + if (Offsets) + Offsets->push_back(StartingOffset); +} + +namespace llvm { + /// RegsForValue - This struct represents the registers (physical or virtual) + /// that a particular set of values is assigned, and the type information about + /// the value. The most common situation is to represent one value at a time, + /// but struct or array values are handled element-wise as multiple values. + /// The splitting of aggregates is performed recursively, so that we never + /// have aggregate-typed registers. The values at this point do not necessarily + /// have legal types, so each value may require one or more registers of some + /// legal type. + /// + struct VISIBILITY_HIDDEN RegsForValue { + /// TLI - The TargetLowering object. + /// + const TargetLowering *TLI; + + /// ValueVTs - The value types of the values, which may not be legal, and + /// may need be promoted or synthesized from one or more registers. + /// + SmallVector<MVT, 4> ValueVTs; + + /// RegVTs - The value types of the registers. This is the same size as + /// ValueVTs and it records, for each value, what the type of the assigned + /// register or registers are. (Individual values are never synthesized + /// from more than one type of register.) + /// + /// With virtual registers, the contents of RegVTs is redundant with TLI's + /// getRegisterType member function, however when with physical registers + /// it is necessary to have a separate record of the types. + /// + SmallVector<MVT, 4> RegVTs; + + /// Regs - This list holds the registers assigned to the values. + /// Each legal or promoted value requires one register, and each + /// expanded value requires multiple registers. + /// + SmallVector<unsigned, 4> Regs; + + RegsForValue() : TLI(0) {} + + RegsForValue(const TargetLowering &tli, + const SmallVector<unsigned, 4> ®s, + MVT regvt, MVT valuevt) + : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + RegsForValue(const TargetLowering &tli, + const SmallVector<unsigned, 4> ®s, + const SmallVector<MVT, 4> ®vts, + const SmallVector<MVT, 4> &valuevts) + : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} + RegsForValue(const TargetLowering &tli, + unsigned Reg, const Type *Ty) : TLI(&tli) { + ComputeValueVTs(tli, Ty, ValueVTs); + + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + MVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI->getNumRegisters(ValueVT); + MVT RegisterVT = TLI->getRegisterType(ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } + } + + /// append - Add the specified values to this one. + void append(const RegsForValue &RHS) { + TLI = RHS.TLI; + ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); + RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); + Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + } + + + /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from + /// this value and returns the result as a ValueVTs value. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the + /// specified value into the registers specified by this object. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// AddInlineAsmOperands - Add this value to the specified inlineasm node + /// operand list. This adds the code marker, matching input operand index + /// (if applicable), and includes the number of values added into it. + void AddInlineAsmOperands(unsigned Code, + bool HasMatching, unsigned MatchingIdx, + SelectionDAG &DAG, std::vector<SDValue> &Ops) const; + }; +} + +/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by +/// PHI nodes or outside of the basic block that defines it, or used by a +/// switch or atomic instruction, which may expand to multiple basic blocks. +static bool isUsedOutsideOfDefiningBlock(Instruction *I) { + if (isa<PHINode>(I)) return true; + BasicBlock *BB = I->getParent(); + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) + if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI)) + return true; + return false; +} + +/// isOnlyUsedInEntryBlock - If the specified argument is only used in the +/// entry block, return true. This includes arguments used by switches, since +/// the switch may expand into multiple basic blocks. +static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) { + // With FastISel active, we may be splitting blocks, so force creation + // of virtual registers for all non-dead arguments. + // Don't force virtual registers for byval arguments though, because + // fast-isel can't handle those in all cases. + if (EnableFastISel && !A->hasByValAttr()) + return A->use_empty(); + + BasicBlock *Entry = A->getParent()->begin(); + for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI) + if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI)) + return false; // Use not in entry block. + return true; +} + +FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli) + : TLI(tli) { +} + +void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, + SelectionDAG &DAG, + bool EnableFastISel) { + Fn = &fn; + MF = &mf; + RegInfo = &MF->getRegInfo(); + + // Create a vreg for each argument register that is not dead and is used + // outside of the entry block for the function. + for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); + AI != E; ++AI) + if (!isOnlyUsedInEntryBlock(AI, EnableFastISel)) + InitializeRegForValue(AI); + + // Initialize the mapping of values to registers. This is only set up for + // instruction values that are used outside of the block that defines + // them. + Function::iterator BB = Fn->begin(), EB = Fn->end(); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { + const Type *Ty = AI->getAllocatedType(); + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + AI->getAlignment()); + + TySize *= CUI->getZExtValue(); // Get total allocated size. + if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + StaticAllocaMap[AI] = + MF->getFrameInfo()->CreateStackObject(TySize, Align); + } + + for (; BB != EB; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I)) + if (!isa<AllocaInst>(I) || + !StaticAllocaMap.count(cast<AllocaInst>(I))) + InitializeRegForValue(I); + + // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This + // also creates the initial PHI MachineInstrs, though none of the input + // operands are populated. + for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) { + MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); + MBBMap[BB] = MBB; + MF->push_back(MBB); + + // Create Machine PHI nodes for LLVM PHI nodes, lowering them as + // appropriate. + PHINode *PN; + DebugLoc DL; + for (BasicBlock::iterator + I = BB->begin(), E = BB->end(); I != E; ++I) { + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (Function *F = CI->getCalledFunction()) { + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::dbg_stoppoint: { + DbgStopPointInst *SPI = cast<DbgStopPointInst>(I); + + if (DIDescriptor::ValidDebugInfo(SPI->getContext(), + CodeGenOpt::Default)) { + DICompileUnit CU(cast<GlobalVariable>(SPI->getContext())); + unsigned idx = MF->getOrCreateDebugLocID(CU.getGV(), + SPI->getLine(), + SPI->getColumn()); + DL = DebugLoc::get(idx); + } + + break; + } + case Intrinsic::dbg_func_start: { + DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I); + Value *SP = FSI->getSubprogram(); + + if (DIDescriptor::ValidDebugInfo(SP, CodeGenOpt::Default)) { + DISubprogram Subprogram(cast<GlobalVariable>(SP)); + DICompileUnit CU(Subprogram.getCompileUnit()); + unsigned Line = Subprogram.getLineNumber(); + DL = DebugLoc::get(MF->getOrCreateDebugLocID(CU.getGV(), + Line, 0)); + } + + break; + } + } + } + } + + PN = dyn_cast<PHINode>(I); + if (!PN || PN->use_empty()) continue; + + unsigned PHIReg = ValueMap[PN]; + assert(PHIReg && "PHI node does not have an assigned virtual register!"); + + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(TLI, PN->getType(), ValueVTs); + for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { + MVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(VT); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + for (unsigned i = 0; i != NumRegisters; ++i) + BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i); + PHIReg += NumRegisters; + } + } + } +} + +unsigned FunctionLoweringInfo::MakeReg(MVT VT) { + return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); +} + +/// CreateRegForValue - Allocate the appropriate number of virtual registers of +/// the correctly promoted or expanded types. Assign these registers +/// consecutive vreg numbers and return the first assigned number. +/// +/// In the case that the given value has struct or array type, this function +/// will assign registers for each member or element. +/// +unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(TLI, V->getType(), ValueVTs); + + unsigned FirstReg = 0; + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + MVT ValueVT = ValueVTs[Value]; + MVT RegisterVT = TLI.getRegisterType(ValueVT); + + unsigned NumRegs = TLI.getNumRegisters(ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) { + unsigned R = MakeReg(RegisterVT); + if (!FirstReg) FirstReg = R; + } + } + return FirstReg; +} + +/// getCopyFromParts - Create a value that contains the specified legal parts +/// combined into the value they represent. If the parts combine to a type +/// larger then ValueVT then AssertOp can be used to specify whether the extra +/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT +/// (ISD::AssertSext). +static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, + const SDValue *Parts, + unsigned NumParts, MVT PartVT, MVT ValueVT, + ISD::NodeType AssertOp = ISD::DELETED_NODE) { + assert(NumParts > 0 && "No parts to assemble!"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Val = Parts[0]; + + if (NumParts > 1) { + // Assemble the value from multiple parts. + if (!ValueVT.isVector() && ValueVT.isInteger()) { + unsigned PartBits = PartVT.getSizeInBits(); + unsigned ValueBits = ValueVT.getSizeInBits(); + + // Assemble the power of 2 part. + unsigned RoundParts = NumParts & (NumParts - 1) ? + 1 << Log2_32(NumParts) : NumParts; + unsigned RoundBits = PartBits * RoundParts; + MVT RoundVT = RoundBits == ValueBits ? + ValueVT : MVT::getIntegerVT(RoundBits); + SDValue Lo, Hi; + + MVT HalfVT = MVT::getIntegerVT(RoundBits/2); + + if (RoundParts > 2) { + Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT); + Hi = getCopyFromParts(DAG, dl, Parts+RoundParts/2, RoundParts/2, + PartVT, HalfVT); + } else { + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]); + } + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi); + + if (RoundParts < NumParts) { + // Assemble the trailing non-power-of-2 part. + unsigned OddParts = NumParts - RoundParts; + MVT OddVT = MVT::getIntegerVT(OddParts * PartBits); + Hi = getCopyFromParts(DAG, dl, + Parts+RoundParts, OddParts, PartVT, OddVT); + + // Combine the round and odd parts. + Lo = Val; + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + MVT TotalVT = MVT::getIntegerVT(NumParts * PartBits); + Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi); + Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi, + DAG.getConstant(Lo.getValueType().getSizeInBits(), + TLI.getPointerTy())); + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo); + Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi); + } + } else if (ValueVT.isVector()) { + // Handle a multi-element vector. + MVT IntermediateVT, RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = + TLI.getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + assert(RegisterVT == Parts[0].getValueType() && + "Part type doesn't match part!"); + + // Assemble the parts into intermediate operands. + SmallVector<SDValue, 8> Ops(NumIntermediates); + if (NumIntermediates == NumParts) { + // If the register was not expanded, truncate or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1, + PartVT, IntermediateVT); + } else if (NumParts > 0) { + // If the intermediate type was expanded, build the intermediate operands + // from the parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor, + PartVT, IntermediateVT); + } + + // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate + // operands. + Val = DAG.getNode(IntermediateVT.isVector() ? + ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl, + ValueVT, &Ops[0], NumIntermediates); + } else if (PartVT.isFloatingPoint()) { + // FP split into multiple FP parts (for ppcf128) + assert(ValueVT == MVT(MVT::ppcf128) && PartVT == MVT(MVT::f64) && + "Unexpected split"); + SDValue Lo, Hi; + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[1]); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi); + } else { + // FP split into integer parts (soft fp) + assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && + !PartVT.isVector() && "Unexpected split"); + MVT IntVT = MVT::getIntegerVT(ValueVT.getSizeInBits()); + Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT); + } + } + + // There is now one part, held in Val. Correct it to match ValueVT. + PartVT = Val.getValueType(); + + if (PartVT == ValueVT) + return Val; + + if (PartVT.isVector()) { + assert(ValueVT.isVector() && "Unknown vector conversion!"); + return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); + } + + if (ValueVT.isVector()) { + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial scalar-to-vector conversions should get here!"); + return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val); + } + + if (PartVT.isInteger() && + ValueVT.isInteger()) { + if (ValueVT.bitsLT(PartVT)) { + // For a truncate, see if we have any information to + // indicate whether the truncated bits will always be + // zero or sign-extension. + if (AssertOp != ISD::DELETED_NODE) + Val = DAG.getNode(AssertOp, dl, PartVT, Val, + DAG.getValueType(ValueVT)); + return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); + } else { + return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val); + } + } + + if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + if (ValueVT.bitsLT(Val.getValueType())) + // FP_ROUND's are always exact here. + return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val, + DAG.getIntPtrConstant(1)); + return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val); + } + + if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) + return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); + + assert(0 && "Unknown mismatch!"); + return SDValue(); +} + +/// getCopyToParts - Create a series of nodes that contain the specified value +/// split into legal parts. If the parts contain more bits than Val, then, for +/// integers, ExtendKind can be used to specify how to generate the extra bits. +static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, + SDValue *Parts, unsigned NumParts, MVT PartVT, + ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT PtrVT = TLI.getPointerTy(); + MVT ValueVT = Val.getValueType(); + unsigned PartBits = PartVT.getSizeInBits(); + unsigned OrigNumParts = NumParts; + assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); + + if (!NumParts) + return; + + if (!ValueVT.isVector()) { + if (PartVT == ValueVT) { + assert(NumParts == 1 && "No-op copy with multiple parts!"); + Parts[0] = Val; + return; + } + + if (NumParts * PartBits > ValueVT.getSizeInBits()) { + // If the parts cover more bits than the value has, promote the value. + if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + assert(NumParts == 1 && "Do not know what to promote to!"); + Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val); + } else if (PartVT.isInteger() && ValueVT.isInteger()) { + ValueVT = MVT::getIntegerVT(NumParts * PartBits); + Val = DAG.getNode(ExtendKind, dl, ValueVT, Val); + } else { + assert(0 && "Unknown mismatch!"); + } + } else if (PartBits == ValueVT.getSizeInBits()) { + // Different types of the same size. + assert(NumParts == 1 && PartVT != ValueVT); + Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); + } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { + // If the parts cover less bits than value has, truncate the value. + if (PartVT.isInteger() && ValueVT.isInteger()) { + ValueVT = MVT::getIntegerVT(NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); + } else { + assert(0 && "Unknown mismatch!"); + } + } + + // The value may have changed - recompute ValueVT. + ValueVT = Val.getValueType(); + assert(NumParts * PartBits == ValueVT.getSizeInBits() && + "Failed to tile the value with PartVT!"); + + if (NumParts == 1) { + assert(PartVT == ValueVT && "Type conversion failed!"); + Parts[0] = Val; + return; + } + + // Expand the value into multiple parts. + if (NumParts & (NumParts - 1)) { + // The number of parts is not a power of 2. Split off and copy the tail. + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Do not know what to expand to!"); + unsigned RoundParts = 1 << Log2_32(NumParts); + unsigned RoundBits = RoundParts * PartBits; + unsigned OddParts = NumParts - RoundParts; + SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val, + DAG.getConstant(RoundBits, + TLI.getPointerTy())); + getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, OddParts, PartVT); + if (TLI.isBigEndian()) + // The odd parts were reversed by getCopyToParts - unreverse them. + std::reverse(Parts + RoundParts, Parts + NumParts); + NumParts = RoundParts; + ValueVT = MVT::getIntegerVT(NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); + } + + // The number of parts is a power of 2. Repeatedly bisect the value using + // EXTRACT_ELEMENT. + Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::getIntegerVT(ValueVT.getSizeInBits()), + Val); + for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { + for (unsigned i = 0; i < NumParts; i += StepSize) { + unsigned ThisBits = StepSize * PartBits / 2; + MVT ThisVT = MVT::getIntegerVT (ThisBits); + SDValue &Part0 = Parts[i]; + SDValue &Part1 = Parts[i+StepSize/2]; + + Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, + ThisVT, Part0, + DAG.getConstant(1, PtrVT)); + Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, + ThisVT, Part0, + DAG.getConstant(0, PtrVT)); + + if (ThisBits == PartBits && ThisVT != PartVT) { + Part0 = DAG.getNode(ISD::BIT_CONVERT, dl, + PartVT, Part0); + Part1 = DAG.getNode(ISD::BIT_CONVERT, dl, + PartVT, Part1); + } + } + } + + if (TLI.isBigEndian()) + std::reverse(Parts, Parts + OrigNumParts); + + return; + } + + // Vector ValueVT. + if (NumParts == 1) { + if (PartVT != ValueVT) { + if (PartVT.isVector()) { + Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); + } else { + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial vector-to-scalar conversions should get here!"); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + PartVT, Val, + DAG.getConstant(0, PtrVT)); + } + } + + Parts[0] = Val; + return; + } + + // Handle a multi-element vector. + MVT IntermediateVT, RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = TLI + .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + unsigned NumElements = ValueVT.getVectorNumElements(); + + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + + // Split the vector into intermediate operands. + SmallVector<SDValue, 8> Ops(NumIntermediates); + for (unsigned i = 0; i != NumIntermediates; ++i) + if (IntermediateVT.isVector()) + Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, + IntermediateVT, Val, + DAG.getConstant(i * (NumElements / NumIntermediates), + PtrVT)); + else + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + IntermediateVT, Val, + DAG.getConstant(i, PtrVT)); + + // Split the intermediate operands into legal parts. + if (NumParts == NumIntermediates) { + // If the register was not expanded, promote or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT); + } else if (NumParts > 0) { + // If the intermediate type was expanded, split each the value into + // legal parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + getCopyToParts(DAG, dl, Ops[i], &Parts[i * Factor], Factor, PartVT); + } +} + + +void SelectionDAGLowering::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { + AA = &aa; + GFI = gfi; + TD = DAG.getTarget().getTargetData(); +} + +/// clear - Clear out the curret SelectionDAG and the associated +/// state and prepare this SelectionDAGLowering object to be used +/// for a new block. This doesn't clear out information about +/// additional blocks that are needed to complete switch lowering +/// or PHI node updating; that information is cleared out as it is +/// consumed. +void SelectionDAGLowering::clear() { + NodeMap.clear(); + PendingLoads.clear(); + PendingExports.clear(); + DAG.clear(); + CurDebugLoc = DebugLoc::getUnknownLoc(); +} + +/// getRoot - Return the current virtual root of the Selection DAG, +/// flushing any PendingLoad items. This must be done before emitting +/// a store or any other node that may need to be ordered after any +/// prior load instructions. +/// +SDValue SelectionDAGLowering::getRoot() { + if (PendingLoads.empty()) + return DAG.getRoot(); + + if (PendingLoads.size() == 1) { + SDValue Root = PendingLoads[0]; + DAG.setRoot(Root); + PendingLoads.clear(); + return Root; + } + + // Otherwise, we have to make a token factor node. + SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &PendingLoads[0], PendingLoads.size()); + PendingLoads.clear(); + DAG.setRoot(Root); + return Root; +} + +/// getControlRoot - Similar to getRoot, but instead of flushing all the +/// PendingLoad items, flush all the PendingExports items. It is necessary +/// to do this before emitting a terminator instruction. +/// +SDValue SelectionDAGLowering::getControlRoot() { + SDValue Root = DAG.getRoot(); + + if (PendingExports.empty()) + return Root; + + // Turn all of the CopyToReg chains into one factored node. + if (Root.getOpcode() != ISD::EntryToken) { + unsigned i = 0, e = PendingExports.size(); + for (; i != e; ++i) { + assert(PendingExports[i].getNode()->getNumOperands() > 1); + if (PendingExports[i].getNode()->getOperand(0) == Root) + break; // Don't add the root if we already indirectly depend on it. + } + + if (i == e) + PendingExports.push_back(Root); + } + + Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &PendingExports[0], + PendingExports.size()); + PendingExports.clear(); + DAG.setRoot(Root); + return Root; +} + +void SelectionDAGLowering::visit(Instruction &I) { + visit(I.getOpcode(), I); +} + +void SelectionDAGLowering::visit(unsigned Opcode, User &I) { + // Note: this doesn't use InstVisitor, because it has to work with + // ConstantExpr's in addition to instructions. + switch (Opcode) { + default: assert(0 && "Unknown instruction type encountered!"); + abort(); + // Build the switch statement using the Instruction.def file. +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE:return visit##OPCODE((CLASS&)I); +#include "llvm/Instruction.def" + } +} + +void SelectionDAGLowering::visitAdd(User &I) { + if (I.getType()->isFPOrFPVector()) + visitBinary(I, ISD::FADD); + else + visitBinary(I, ISD::ADD); +} + +void SelectionDAGLowering::visitMul(User &I) { + if (I.getType()->isFPOrFPVector()) + visitBinary(I, ISD::FMUL); + else + visitBinary(I, ISD::MUL); +} + +SDValue SelectionDAGLowering::getValue(const Value *V) { + SDValue &N = NodeMap[V]; + if (N.getNode()) return N; + + if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) { + MVT VT = TLI.getValueType(V->getType(), true); + + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) + return N = DAG.getConstant(*CI, VT); + + if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) + return N = DAG.getGlobalAddress(GV, VT); + + if (isa<ConstantPointerNull>(C)) + return N = DAG.getConstant(0, TLI.getPointerTy()); + + if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) + return N = DAG.getConstantFP(*CFP, VT); + + if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) + return N = DAG.getUNDEF(VT); + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + visit(CE->getOpcode(), *CE); + SDValue N1 = NodeMap[V]; + assert(N1.getNode() && "visit didn't populate the ValueMap!"); + return N1; + } + + if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) { + SmallVector<SDValue, 4> Constants; + for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); + OI != OE; ++OI) { + SDNode *Val = getValue(*OI).getNode(); + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Constants.push_back(SDValue(Val, i)); + } + return DAG.getMergeValues(&Constants[0], Constants.size(), + getCurDebugLoc()); + } + + if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) { + assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && + "Unknown struct or array constant!"); + + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(TLI, C->getType(), ValueVTs); + unsigned NumElts = ValueVTs.size(); + if (NumElts == 0) + return SDValue(); // empty struct + SmallVector<SDValue, 4> Constants(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + MVT EltVT = ValueVTs[i]; + if (isa<UndefValue>(C)) + Constants[i] = DAG.getUNDEF(EltVT); + else if (EltVT.isFloatingPoint()) + Constants[i] = DAG.getConstantFP(0, EltVT); + else + Constants[i] = DAG.getConstant(0, EltVT); + } + return DAG.getMergeValues(&Constants[0], NumElts, getCurDebugLoc()); + } + + const VectorType *VecTy = cast<VectorType>(V->getType()); + unsigned NumElements = VecTy->getNumElements(); + + // Now that we know the number and type of the elements, get that number of + // elements into the Ops array based on what kind of constant it is. + SmallVector<SDValue, 16> Ops; + if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) { + for (unsigned i = 0; i != NumElements; ++i) + Ops.push_back(getValue(CP->getOperand(i))); + } else { + assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); + MVT EltVT = TLI.getValueType(VecTy->getElementType()); + + SDValue Op; + if (EltVT.isFloatingPoint()) + Op = DAG.getConstantFP(0, EltVT); + else + Op = DAG.getConstant(0, EltVT); + Ops.assign(NumElements, Op); + } + + // Create a BUILD_VECTOR node. + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size()); + } + + // If this is a static alloca, generate it as the frameindex instead of + // computation. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); + } + + unsigned InReg = FuncInfo.ValueMap[V]; + assert(InReg && "Value not in map!"); + + RegsForValue RFV(TLI, InReg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL); +} + + +void SelectionDAGLowering::visitRet(ReturnInst &I) { + if (I.getNumOperands() == 0) { + DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), + MVT::Other, getControlRoot())); + return; + } + + SmallVector<SDValue, 8> NewValues; + NewValues.push_back(getControlRoot()); + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) continue; + + SDValue RetOp = getValue(I.getOperand(i)); + for (unsigned j = 0, f = NumValues; j != f; ++j) { + MVT VT = ValueVTs[j]; + + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + const Function *F = I.getParent()->getParent(); + if (F->paramHasAttr(0, Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (F->paramHasAttr(0, Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + MVT MinVT = TLI.getRegisterType(MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(VT); + MVT PartVT = TLI.getRegisterType(VT); + SmallVector<SDValue, 4> Parts(NumParts); + getCopyToParts(DAG, getCurDebugLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + j), + &Parts[0], NumParts, PartVT, ExtendKind); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (F->paramHasAttr(0, Attribute::InReg)) + Flags.setInReg(); + for (unsigned i = 0; i < NumParts; ++i) { + NewValues.push_back(Parts[i]); + NewValues.push_back(DAG.getArgFlags(Flags)); + } + } + } + DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), MVT::Other, + &NewValues[0], NewValues.size())); +} + +/// CopyToExportRegsIfNeeded - If the given value has virtual registers +/// created for it, emit nodes to copy the value into the virtual +/// registers. +void SelectionDAGLowering::CopyToExportRegsIfNeeded(Value *V) { + if (!V->use_empty()) { + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) + CopyValueToVirtualRegister(V, VMI->second); + } +} + +/// ExportFromCurrentBlock - If this condition isn't known to be exported from +/// the current basic block, add it to ValueMap now so that we'll get a +/// CopyTo/FromReg. +void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) { + // No need to export constants. + if (!isa<Instruction>(V) && !isa<Argument>(V)) return; + + // Already exported? + if (FuncInfo.isExportedInst(V)) return; + + unsigned Reg = FuncInfo.InitializeRegForValue(V); + CopyValueToVirtualRegister(V, Reg); +} + +bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V, + const BasicBlock *FromBB) { + // The operands of the setcc have to be in this block. We don't know + // how to export them from some other block. + if (Instruction *VI = dyn_cast<Instruction>(V)) { + // Can export from current BB. + if (VI->getParent() == FromBB) + return true; + + // Is already exported, noop. + return FuncInfo.isExportedInst(V); + } + + // If this is an argument, we can export it if the BB is the entry block or + // if it is already exported. + if (isa<Argument>(V)) { + if (FromBB == &FromBB->getParent()->getEntryBlock()) + return true; + + // Otherwise, can only export this if it is already exported. + return FuncInfo.isExportedInst(V); + } + + // Otherwise, constants can always be exported. + return true; +} + +static bool InBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() == BB; + return true; +} + +/// getFCmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR floating-point condition code. This includes +/// consideration of global floating-point math flags. +/// +static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) { + ISD::CondCode FPC, FOC; + switch (Pred) { + case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break; + case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break; + case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break; + case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break; + case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break; + case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break; + case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break; + case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break; + case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break; + case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break; + case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break; + case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break; + case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break; + case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break; + case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; + case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; + default: + assert(0 && "Invalid FCmp predicate opcode!"); + FOC = FPC = ISD::SETFALSE; + break; + } + if (FiniteOnlyFPMath()) + return FOC; + else + return FPC; +} + +/// getICmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR integer condition code. +/// +static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) { + switch (Pred) { + case ICmpInst::ICMP_EQ: return ISD::SETEQ; + case ICmpInst::ICMP_NE: return ISD::SETNE; + case ICmpInst::ICMP_SLE: return ISD::SETLE; + case ICmpInst::ICMP_ULE: return ISD::SETULE; + case ICmpInst::ICMP_SGE: return ISD::SETGE; + case ICmpInst::ICMP_UGE: return ISD::SETUGE; + case ICmpInst::ICMP_SLT: return ISD::SETLT; + case ICmpInst::ICMP_ULT: return ISD::SETULT; + case ICmpInst::ICMP_SGT: return ISD::SETGT; + case ICmpInst::ICMP_UGT: return ISD::SETUGT; + default: + assert(0 && "Invalid ICmp predicate opcode!"); + return ISD::SETNE; + } +} + +/// EmitBranchForMergedCondition - Helper method for FindMergedConditions. +/// This function emits a branch and is used at the leaves of an OR or an +/// AND operator tree. +/// +void +SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB) { + const BasicBlock *BB = CurBB->getBasicBlock(); + + // If the leaf of the tree is a comparison, merge the condition into + // the caseblock. + if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { + // The operands of the cmp have to be in this block. We don't know + // how to export them from some other block. If this is the first block + // of the sequence, no exporting is needed. + if (CurBB == CurMBB || + (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && + isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { + ISD::CondCode Condition; + if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { + Condition = getICmpCondCode(IC->getPredicate()); + } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { + Condition = getFCmpCondCode(FC->getPredicate()); + } else { + Condition = ISD::SETEQ; // silence warning. + assert(0 && "Unknown compare instruction"); + } + + CaseBlock CB(Condition, BOp->getOperand(0), + BOp->getOperand(1), NULL, TBB, FBB, CurBB); + SwitchCases.push_back(CB); + return; + } + } + + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(), + NULL, TBB, FBB, CurBB); + SwitchCases.push_back(CB); +} + +/// FindMergedConditions - If Cond is an expression like +void SelectionDAGLowering::FindMergedConditions(Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + unsigned Opc) { + // If this node is not part of the or/and tree, emit it as a branch. + Instruction *BOp = dyn_cast<Instruction>(Cond); + if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || + (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || + BOp->getParent() != CurBB->getBasicBlock() || + !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || + !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB); + return; + } + + // Create TmpBB after CurBB. + MachineFunction::iterator BBI = CurBB; + MachineFunction &MF = DAG.getMachineFunction(); + MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->insert(++BBI, TmpBB); + + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc); + + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc); + + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); + } +} + +/// If the set of cases should be emitted as a series of branches, return true. +/// If we should emit this as a bunch of and/or'd together conditions, return +/// false. +bool +SelectionDAGLowering::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ + if (Cases.size() != 2) return true; + + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } + + return true; +} + +void SelectionDAGLowering::visitBr(BranchInst &I) { + // Update machine-CFG edges. + MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + if (I.isUnconditional()) { + // Update machine-CFG edges. + CurMBB->addSuccessor(Succ0MBB); + + // If this is not a fall-through branch, emit the branch. + if (Succ0MBB != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Succ0MBB))); + return; + } + + // If this condition is one of the special cases we handle, do special stuff + // now. + Value *CondVal = I.getCondition(); + MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; + + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + // + if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { + if (BOp->hasOneUse() && + (BOp->getOpcode() == Instruction::And || + BOp->getOpcode() == Instruction::Or)) { + FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode()); + // If the compares in later blocks need to use values not currently + // exported from this block, export them now. This block should always + // be the first entry. + assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!"); + + // Allow some cases to be rejected. + if (ShouldEmitAsBranches(SwitchCases)) { + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { + ExportFromCurrentBlock(SwitchCases[i].CmpLHS); + ExportFromCurrentBlock(SwitchCases[i].CmpRHS); + } + + // Emit the branch for this block. + visitSwitchCase(SwitchCases[0]); + SwitchCases.erase(SwitchCases.begin()); + return; + } + + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) + CurMBB->getParent()->erase(SwitchCases[i].ThisBB); + + SwitchCases.clear(); + } + } + + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(), + NULL, Succ0MBB, Succ1MBB, CurMBB); + // Use visitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + visitSwitchCase(CB); +} + +/// visitSwitchCase - Emits the necessary code to represent a single node in +/// the binary search tree resulting from lowering a switch instruction. +void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) { + SDValue Cond; + SDValue CondLHS = getValue(CB.CmpLHS); + DebugLoc dl = getCurDebugLoc(); + + // Build the setcc now. + if (CB.CmpMHS == NULL) { + // Fold "(X == true)" to X and "(X == false)" to !X to + // handle common cases produced by branch lowering. + if (CB.CmpRHS == ConstantInt::getTrue() && CB.CC == ISD::SETEQ) + Cond = CondLHS; + else if (CB.CmpRHS == ConstantInt::getFalse() && CB.CC == ISD::SETEQ) { + SDValue True = DAG.getConstant(1, CondLHS.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); + } else + Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); + } else { + assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); + + const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); + const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); + + SDValue CmpOp = getValue(CB.CmpMHS); + MVT VT = CmpOp.getValueType(); + + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { + Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), + ISD::SETLE); + } else { + SDValue SUB = DAG.getNode(ISD::SUB, dl, + VT, CmpOp, DAG.getConstant(Low, VT)); + Cond = DAG.getSetCC(dl, MVT::i1, SUB, + DAG.getConstant(High-Low, VT), ISD::SETULE); + } + } + + // Update successor info + CurMBB->addSuccessor(CB.TrueBB); + CurMBB->addSuccessor(CB.FalseBB); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + // If the lhs block is the next block, invert the condition so that we can + // fall through to the lhs instead of the rhs block. + if (CB.TrueBB == NextBlock) { + std::swap(CB.TrueBB, CB.FalseBB); + SDValue True = DAG.getConstant(1, Cond.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); + } + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, getControlRoot(), Cond, + DAG.getBasicBlock(CB.TrueBB)); + + // If the branch was constant folded, fix up the CFG. + if (BrCond.getOpcode() == ISD::BR) { + CurMBB->removeSuccessor(CB.FalseBB); + DAG.setRoot(BrCond); + } else { + // Otherwise, go ahead and insert the false branch. + if (BrCond == getControlRoot()) + CurMBB->removeSuccessor(CB.TrueBB); + + if (CB.FalseBB == NextBlock) + DAG.setRoot(BrCond); + else + DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB))); + } +} + +/// visitJumpTable - Emit JumpTable node in the current MBB +void SelectionDAGLowering::visitJumpTable(JumpTable &JT) { + // Emit the code for the jump table + assert(JT.Reg != -1U && "Should lower JT Header first!"); + MVT PTy = TLI.getPointerTy(); + SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + JT.Reg, PTy); + SDValue Table = DAG.getJumpTable(JT.JTI, PTy); + DAG.setRoot(DAG.getNode(ISD::BR_JT, getCurDebugLoc(), + MVT::Other, Index.getValue(1), + Table, Index)); +} + +/// visitJumpTableHeader - This function emits necessary code to produce index +/// in the JumpTable from switch case. +void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT, + JumpTableHeader &JTH) { + // Subtract the lowest switch case value from the value being switched on and + // conditional branch to default mbb if the result is greater than the + // difference between smallest and largest cases. + SDValue SwitchOp = getValue(JTH.SValue); + MVT VT = SwitchOp.getValueType(); + SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + DAG.getConstant(JTH.First, VT)); + + // The SDNode we just created, which holds the value being switched on minus + // the the smallest case value, needs to be copied to a virtual register so it + // can be used as an index into the jump table in a subsequent basic block. + // This value may be smaller or larger than the target's pointer type, and + // therefore require extension or truncating. + if (VT.bitsGT(TLI.getPointerTy())) + SwitchOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), SUB); + else + SwitchOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), SUB); + + unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy()); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + JumpTableReg, SwitchOp); + JT.Reg = JumpTableReg; + + // Emit the range check for the jump table, and branch to the default block + // for the switch statement if the value being switched on exceeds the largest + // case in the switch. + SDValue CMP = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(SUB.getValueType()), SUB, + DAG.getConstant(JTH.Last-JTH.First,VT), + ISD::SETUGT); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, CopyTo, CMP, + DAG.getBasicBlock(JT.Default)); + + if (JT.MBB == NextBlock) + DAG.setRoot(BrCond); + else + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond, + DAG.getBasicBlock(JT.MBB))); +} + +/// visitBitTestHeader - This function emits necessary code to produce value +/// suitable for "bit tests" +void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) { + // Subtract the minimum value + SDValue SwitchOp = getValue(B.SValue); + MVT VT = SwitchOp.getValueType(); + SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + DAG.getConstant(B.First, VT)); + + // Check range + SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(SUB.getValueType()), + SUB, DAG.getConstant(B.Range, VT), + ISD::SETUGT); + + SDValue ShiftOp; + if (VT.bitsGT(TLI.getPointerTy())) + ShiftOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), SUB); + else + ShiftOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), SUB); + + B.Reg = FuncInfo.MakeReg(TLI.getPointerTy()); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + B.Reg, ShiftOp); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + MachineBasicBlock* MBB = B.Cases[0].ThisBB; + + CurMBB->addSuccessor(B.Default); + CurMBB->addSuccessor(MBB); + + SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, CopyTo, RangeCmp, + DAG.getBasicBlock(B.Default)); + + if (MBB == NextBlock) + DAG.setRoot(BrRange); + else + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo, + DAG.getBasicBlock(MBB))); +} + +/// visitBitTestCase - this function produces one "bit test" +void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB, + unsigned Reg, + BitTestCase &B) { + // Make desired shift + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, + TLI.getPointerTy()); + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(1, TLI.getPointerTy()), + ShiftOp); + + // Emit bit tests and jumps + SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), + TLI.getPointerTy(), SwitchVal, + DAG.getConstant(B.Mask, TLI.getPointerTy())); + SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(AndOp.getValueType()), + AndOp, DAG.getConstant(0, TLI.getPointerTy()), + ISD::SETNE); + + CurMBB->addSuccessor(B.TargetBB); + CurMBB->addSuccessor(NextMBB); + + SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, getControlRoot(), + AndCmp, DAG.getBasicBlock(B.TargetBB)); + + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + if (NextMBB == NextBlock) + DAG.setRoot(BrAnd); + else + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd, + DAG.getBasicBlock(NextMBB))); +} + +void SelectionDAGLowering::visitInvoke(InvokeInst &I) { + // Retrieve successors. + MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; + MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; + + const Value *Callee(I.getCalledValue()); + if (isa<InlineAsm>(Callee)) + visitInlineAsm(&I); + else + LowerCallTo(&I, getValue(Callee), false, LandingPad); + + // If the value of the invoke is used outside of its defining block, make it + // available as a virtual register. + CopyToExportRegsIfNeeded(&I); + + // Update successor info + CurMBB->addSuccessor(Return); + CurMBB->addSuccessor(LandingPad); + + // Drop into normal successor. + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Return))); +} + +void SelectionDAGLowering::visitUnwind(UnwindInst &I) { +} + +/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for +/// small case ranges). +bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { + Case& BackCase = *(CR.Range.second-1); + + // Size is the number of Cases represented by this range. + size_t Size = CR.Range.second - CR.Range.first; + if (Size > 3) + return false; + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; + + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + // TODO: If any two of the cases has the same destination, and if one value + // is the same as the other, but has one bit unset that the other has set, + // use bit manipulation to do two compares at once. For example: + // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + + // Rearrange the case blocks so that the last one falls through if possible. + if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { + // The last case block won't fall through into 'NextBlock' if we emit the + // branches in this order. See if rearranging a case value would help. + for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) { + if (I->BB == NextBlock) { + std::swap(*I, BackCase); + break; + } + } + } + + // Create a CaseBlock record representing a conditional branch to + // the Case's target mbb if the value being switched on SV is equal + // to C. + MachineBasicBlock *CurBlock = CR.CaseBB; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { + MachineBasicBlock *FallThrough; + if (I != E-1) { + FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); + CurMF->insert(BBI, FallThrough); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } else { + // If the last case doesn't match, go to the default block. + FallThrough = Default; + } + + Value *RHS, *LHS, *MHS; + ISD::CondCode CC; + if (I->High == I->Low) { + // This is just small small case range :) containing exactly 1 case + CC = ISD::SETEQ; + LHS = SV; RHS = I->High; MHS = NULL; + } else { + CC = ISD::SETLE; + LHS = I->Low; MHS = SV; RHS = I->High; + } + CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock); + + // If emitting the first comparison, just call visitSwitchCase to emit the + // code into the current block. Otherwise, push the CaseBlock onto the + // vector to be later processed by SDISel, and insert the node's MBB + // before the next MBB. + if (CurBlock == CurMBB) + visitSwitchCase(CB); + else + SwitchCases.push_back(CB); + + CurBlock = FallThrough; + } + + return true; +} + +static inline bool areJTsAllowed(const TargetLowering &TLI) { + return !DisableJumpTables && + (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); +} + +static APInt ComputeRange(const APInt &First, const APInt &Last) { + APInt LastExt(Last), FirstExt(First); + uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; + LastExt.sext(BitWidth); FirstExt.sext(BitWidth); + return (LastExt - FirstExt + 1ULL); +} + +/// handleJTSwitchCase - Emit jumptable for current switch case range +bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + + const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue(); + + size_t TSize = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) + TSize += I->size(); + + if (!areJTsAllowed(TLI) || TSize <= 3) + return false; + + APInt Range = ComputeRange(First, Last); + double Density = (double)TSize / Range.roundToDouble(); + if (Density < 0.4) + return false; + + DEBUG(errs() << "Lowering jump table\n" + << "First entry: " << First << ". Last entry: " << Last << '\n' + << "Range: " << Range + << "Size: " << TSize << ". Density: " << Density << "\n\n"); + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; + + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + // Create a new basic block to hold the code for loading the address + // of the jump table, and jumping to it. Update successor information; + // we will either branch to the default case for the switch, or the jump + // table. + MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, JumpTableBB); + CR.CaseBB->addSuccessor(Default); + CR.CaseBB->addSuccessor(JumpTableBB); + + // Build a vector of destination BBs, corresponding to each target + // of the jump table. If the value of the jump table slot corresponds to + // a case statement, push the case's BB onto the vector, otherwise, push + // the default BB. + std::vector<MachineBasicBlock*> DestBBs; + APInt TEI = First; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { + const APInt& Low = cast<ConstantInt>(I->Low)->getValue(); + const APInt& High = cast<ConstantInt>(I->High)->getValue(); + + if (Low.sle(TEI) && TEI.sle(High)) { + DestBBs.push_back(I->BB); + if (TEI==High) + ++I; + } else { + DestBBs.push_back(Default); + } + } + + // Update successor info. Add one edge to each unique successor. + BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); + for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), + E = DestBBs.end(); I != E; ++I) { + if (!SuccsHandled[(*I)->getNumber()]) { + SuccsHandled[(*I)->getNumber()] = true; + JumpTableBB->addSuccessor(*I); + } + } + + // Create a jump table index for this jump table, or return an existing + // one. + unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs); + + // Set the jump table information so that we can codegen it as a second + // MachineBasicBlock + JumpTable JT(-1U, JTI, JumpTableBB, Default); + JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB)); + if (CR.CaseBB == CurMBB) + visitJumpTableHeader(JT, JTH); + + JTCases.push_back(JumpTableBlock(JTH, JT)); + + return true; +} + +/// handleBTSplitSwitchCase - emit comparison and split binary search tree into +/// 2 subtrees. +bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; + + if (++BBI != CurMBB->getParent()->end()) + NextBlock = BBI; + + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + // Size is the number of Cases represented by this range. + unsigned Size = CR.Range.second - CR.Range.first; + + const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue(); + double FMetric = 0; + CaseItr Pivot = CR.Range.first + Size/2; + + // Select optimal pivot, maximizing sum density of LHS and RHS. This will + // (heuristically) allow us to emit JumpTable's later. + size_t TSize = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) + TSize += I->size(); + + size_t LSize = FrontCase.size(); + size_t RSize = TSize-LSize; + DEBUG(errs() << "Selecting best pivot: \n" + << "First: " << First << ", Last: " << Last <<'\n' + << "LSize: " << LSize << ", RSize: " << RSize << '\n'); + for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; + J!=E; ++I, ++J) { + const APInt& LEnd = cast<ConstantInt>(I->High)->getValue(); + const APInt& RBegin = cast<ConstantInt>(J->Low)->getValue(); + APInt Range = ComputeRange(LEnd, RBegin); + assert((Range - 2ULL).isNonNegative() && + "Invalid case distance"); + double LDensity = (double)LSize / (LEnd - First + 1ULL).roundToDouble(); + double RDensity = (double)RSize / (Last - RBegin + 1ULL).roundToDouble(); + double Metric = Range.logBase2()*(LDensity+RDensity); + // Should always split in some non-trivial place + DEBUG(errs() <<"=>Step\n" + << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' + << "LDensity: " << LDensity + << ", RDensity: " << RDensity << '\n' + << "Metric: " << Metric << '\n'); + if (FMetric < Metric) { + Pivot = J; + FMetric = Metric; + DEBUG(errs() << "Current metric set to: " << FMetric << '\n'); + } + + LSize += J->size(); + RSize -= J->size(); + } + if (areJTsAllowed(TLI)) { + // If our case is dense we *really* should handle it earlier! + assert((FMetric > 0) && "Should handle dense range earlier!"); + } else { + Pivot = CR.Range.first + Size/2; + } + + CaseRange LHSR(CR.Range.first, Pivot); + CaseRange RHSR(Pivot, CR.Range.second); + Constant *C = Pivot->Low; + MachineBasicBlock *FalseBB = 0, *TrueBB = 0; + + // We know that we branch to the LHS if the Value being switched on is + // less than the Pivot value, C. We use this to optimize our binary + // tree a bit, by recognizing that if SV is greater than or equal to the + // LHS's Case Value, and that Case Value is exactly one less than the + // Pivot's Value, then we can branch directly to the LHS's Target, + // rather than creating a leaf node for it. + if ((LHSR.second - LHSR.first) == 1 && + LHSR.first->High == CR.GE && + cast<ConstantInt>(C)->getValue() == + (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) { + TrueBB = LHSR.first->BB; + } else { + TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, TrueBB); + WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + // Similar to the optimization above, if the Value being switched on is + // known to be less than the Constant CR.LT, and the current Case Value + // is CR.LT - 1, then we can branch directly to the target block for + // the current Case Value, rather than emitting a RHS leaf node for it. + if ((RHSR.second - RHSR.first) == 1 && CR.LT && + cast<ConstantInt>(RHSR.first->Low)->getValue() == + (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) { + FalseBB = RHSR.first->BB; + } else { + FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, FalseBB); + WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + // Create a CaseBlock record representing a conditional branch to + // the LHS node if the value being switched on SV is less than C. + // Otherwise, branch to LHS. + CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + + if (CR.CaseBB == CurMBB) + visitSwitchCase(CB); + else + SwitchCases.push_back(CB); + + return true; +} + +/// handleBitTestsSwitchCase - if current case range has few destination and +/// range span less, than machine word bitwidth, encode case range into series +/// of masks and emit bit tests with these masks. +bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default){ + unsigned IntPtrBits = TLI.getPointerTy().getSizeInBits(); + + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = CurMBB->getParent(); + + // If target does not have legal shift left, do not emit bit tests at all. + if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) + return false; + + size_t numCmps = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) { + // Single case counts one, case range - two. + numCmps += (I->Low == I->High ? 1 : 2); + } + + // Count unique destinations + SmallSet<MachineBasicBlock*, 4> Dests; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + Dests.insert(I->BB); + if (Dests.size() > 3) + // Don't bother the code below, if there are too much unique destinations + return false; + } + DEBUG(errs() << "Total number of unique destinations: " << Dests.size() << '\n' + << "Total number of comparisons: " << numCmps << '\n'); + + // Compute span of values. + const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue(); + APInt cmpRange = maxValue - minValue; + + DEBUG(errs() << "Compare range: " << cmpRange << '\n' + << "Low bound: " << minValue << '\n' + << "High bound: " << maxValue << '\n'); + + if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) || + (!(Dests.size() == 1 && numCmps >= 3) && + !(Dests.size() == 2 && numCmps >= 5) && + !(Dests.size() >= 3 && numCmps >= 6))) + return false; + + DEBUG(errs() << "Emitting bit tests\n"); + APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); + + // Optimize the case where all the case values fit in a + // word without having to subtract minValue. In this case, + // we can optimize away the subtraction. + if (minValue.isNonNegative() && + maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) { + cmpRange = maxValue; + } else { + lowBound = minValue; + } + + CaseBitsVector CasesBits; + unsigned i, count = 0; + + for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + MachineBasicBlock* Dest = I->BB; + for (i = 0; i < count; ++i) + if (Dest == CasesBits[i].BB) + break; + + if (i == count) { + assert((count < 3) && "Too much destinations to test!"); + CasesBits.push_back(CaseBits(0, Dest, 0)); + count++; + } + + const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue(); + const APInt& highValue = cast<ConstantInt>(I->High)->getValue(); + + uint64_t lo = (lowValue - lowBound).getZExtValue(); + uint64_t hi = (highValue - lowBound).getZExtValue(); + + for (uint64_t j = lo; j <= hi; j++) { + CasesBits[i].Mask |= 1ULL << j; + CasesBits[i].Bits++; + } + + } + std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); + + BitTestInfo BTC; + + // Figure out which block is immediately after the current one. + MachineFunction::iterator BBI = CR.CaseBB; + ++BBI; + + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + + DEBUG(errs() << "Cases:\n"); + for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { + DEBUG(errs() << "Mask: " << CasesBits[i].Mask + << ", Bits: " << CasesBits[i].Bits + << ", BB: " << CasesBits[i].BB << '\n'); + + MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, CaseBB); + BTC.push_back(BitTestCase(CasesBits[i].Mask, + CaseBB, + CasesBits[i].BB)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } + + BitTestBlock BTB(lowBound, cmpRange, SV, + -1U, (CR.CaseBB == CurMBB), + CR.CaseBB, Default, BTC); + + if (CR.CaseBB == CurMBB) + visitBitTestHeader(BTB); + + BitTestCases.push_back(BTB); + + return true; +} + + +/// Clusterify - Transform simple list of Cases into list of CaseRange's +size_t SelectionDAGLowering::Clusterify(CaseVector& Cases, + const SwitchInst& SI) { + size_t numCmps = 0; + + // Start with "simple" cases + for (size_t i = 1; i < SI.getNumSuccessors(); ++i) { + MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)]; + Cases.push_back(Case(SI.getSuccessorValue(i), + SI.getSuccessorValue(i), + SMBB)); + } + std::sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size() >= 2) + // Must recompute end() each iteration because it may be + // invalidated by erase if we hold on to it + for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) { + const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); + const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); + MachineBasicBlock* nextBB = J->BB; + MachineBasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { + I->High = J->High; + J = Cases.erase(J); + } else { + I = J++; + } + } + + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; + } + + return numCmps; +} + +void SelectionDAGLowering::visitSwitch(SwitchInst &SI) { + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CurMBB; + + MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; + + // If there is only the default destination, branch to it if it is not the + // next basic block. Otherwise, just fall through. + if (SI.getNumOperands() == 2) { + // Update machine-CFG edges. + + // If this is not a fall-through branch, emit the branch. + CurMBB->addSuccessor(Default); + if (Default != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Default))); + return; + } + + // If there are any non-default case statements, create a vector of Cases + // representing each one, and sort the vector so that we can efficiently + // create a binary search tree from them. + CaseVector Cases; + size_t numCmps = Clusterify(Cases, SI); + DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << '\n'); + numCmps = 0; + + // Get the Value to be switched on and default basic blocks, which will be + // inserted into CaseBlock records, representing basic blocks in the binary + // search tree. + Value *SV = SI.getOperand(0); + + // Push the initial CaseRec onto the worklist + CaseRecVector WorkList; + WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end()))); + + while (!WorkList.empty()) { + // Grab a record representing a case range to process off the worklist + CaseRec CR = WorkList.back(); + WorkList.pop_back(); + + if (handleBitTestsSwitchCase(CR, WorkList, SV, Default)) + continue; + + // If the range has few cases (two or less) emit a series of specific + // tests. + if (handleSmallSwitchRange(CR, WorkList, SV, Default)) + continue; + + // If the switch has more than 5 blocks, and at least 40% dense, and the + // target supports indirect branches, then emit a jump table rather than + // lowering the switch to a binary tree of conditional branches. + if (handleJTSwitchCase(CR, WorkList, SV, Default)) + continue; + + // Emit binary tree. We need to pick a pivot, and push left and right ranges + // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. + handleBTSplitSwitchCase(CR, WorkList, SV, Default); + } +} + + +void SelectionDAGLowering::visitSub(User &I) { + // -0.0 - X --> fneg + const Type *Ty = I.getType(); + if (isa<VectorType>(Ty)) { + if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) { + const VectorType *DestTy = cast<VectorType>(I.getType()); + const Type *ElTy = DestTy->getElementType(); + if (ElTy->isFloatingPoint()) { + unsigned VL = DestTy->getNumElements(); + std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy)); + Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); + if (CV == CNZ) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; + } + } + } + } + if (Ty->isFloatingPoint()) { + if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) + if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; + } + } + + visitBinary(I, Ty->isFPOrFPVector() ? ISD::FSUB : ISD::SUB); +} + +void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), + Op1.getValueType(), Op1, Op2)); +} + +void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + if (!isa<VectorType>(I.getType()) && + Op2.getValueType() != TLI.getShiftAmountTy()) { + // If the operand is smaller than the shift count type, promote it. + if (TLI.getShiftAmountTy().bitsGT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), + TLI.getShiftAmountTy(), Op2); + // If the operand is larger than the shift count type but the shift + // count type has enough bits to represent any shift value, truncate + // it now. This is a common case and it exposes the truncate to + // optimization early. + else if (TLI.getShiftAmountTy().getSizeInBits() >= + Log2_32_Ceil(Op2.getValueType().getSizeInBits())) + Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getShiftAmountTy(), Op2); + // Otherwise we'll need to temporarily settle for some other + // convenient type; type legalization will make adjustments as + // needed. + else if (TLI.getPointerTy().bitsLT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), Op2); + else if (TLI.getPointerTy().bitsGT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), Op2); + } + + setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), + Op1.getValueType(), Op1, Op2)); +} + +void SelectionDAGLowering::visitICmp(User &I) { + ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; + if (ICmpInst *IC = dyn_cast<ICmpInst>(&I)) + predicate = IC->getPredicate(); + else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) + predicate = ICmpInst::Predicate(IC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Opcode = getICmpCondCode(predicate); + setValue(&I, DAG.getSetCC(getCurDebugLoc(),MVT::i1, Op1, Op2, Opcode)); +} + +void SelectionDAGLowering::visitFCmp(User &I) { + FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; + if (FCmpInst *FC = dyn_cast<FCmpInst>(&I)) + predicate = FC->getPredicate(); + else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) + predicate = FCmpInst::Predicate(FC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Condition = getFCmpCondCode(predicate); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), MVT::i1, Op1, Op2, Condition)); +} + +void SelectionDAGLowering::visitVICmp(User &I) { + ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; + if (VICmpInst *IC = dyn_cast<VICmpInst>(&I)) + predicate = IC->getPredicate(); + else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) + predicate = ICmpInst::Predicate(IC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Opcode = getICmpCondCode(predicate); + setValue(&I, DAG.getVSetCC(getCurDebugLoc(), Op1.getValueType(), + Op1, Op2, Opcode)); +} + +void SelectionDAGLowering::visitVFCmp(User &I) { + FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; + if (VFCmpInst *FC = dyn_cast<VFCmpInst>(&I)) + predicate = FC->getPredicate(); + else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) + predicate = FCmpInst::Predicate(FC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Condition = getFCmpCondCode(predicate); + MVT DestVT = TLI.getValueType(I.getType()); + + setValue(&I, DAG.getVSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); +} + +void SelectionDAGLowering::visitSelect(User &I) { + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(TLI, I.getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues != 0) { + SmallVector<SDValue, 4> Values(NumValues); + SDValue Cond = getValue(I.getOperand(0)); + SDValue TrueVal = getValue(I.getOperand(1)); + SDValue FalseVal = getValue(I.getOperand(2)); + + for (unsigned i = 0; i != NumValues; ++i) + Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(), + TrueVal.getValueType(), Cond, + SDValue(TrueVal.getNode(), TrueVal.getResNo() + i), + SDValue(FalseVal.getNode(), FalseVal.getResNo() + i)); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], NumValues), + &Values[0], NumValues)); + } +} + + +void SelectionDAGLowering::visitTrunc(User &I) { + // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitZExt(User &I) { + // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // ZExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitSExt(User &I) { + // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // SExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitFPTrunc(User &I) { + // FPTrunc is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), + DestVT, N, DAG.getIntPtrConstant(0))); +} + +void SelectionDAGLowering::visitFPExt(User &I){ + // FPTrunc is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitFPToUI(User &I) { + // FPToUI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitFPToSI(User &I) { + // FPToSI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitUIToFP(User &I) { + // UIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitSIToFP(User &I){ + // SIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); +} + +void SelectionDAGLowering::visitPtrToInt(User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + MVT SrcVT = N.getValueType(); + MVT DestVT = TLI.getValueType(I.getType()); + SDValue Result; + if (DestVT.bitsLT(SrcVT)) + Result = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N); + else + // Note: ZERO_EXTEND can handle cases where the sizes are equal too + Result = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N); + setValue(&I, Result); +} + +void SelectionDAGLowering::visitIntToPtr(User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + MVT SrcVT = N.getValueType(); + MVT DestVT = TLI.getValueType(I.getType()); + if (DestVT.bitsLT(SrcVT)) + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); + else + // Note: ZERO_EXTEND can handle cases where the sizes are equal too + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + DestVT, N)); +} + +void SelectionDAGLowering::visitBitCast(User &I) { + SDValue N = getValue(I.getOperand(0)); + MVT DestVT = TLI.getValueType(I.getType()); + + // BitCast assures us that source and destination are the same size so this + // is either a BIT_CONVERT or a no-op. + if (DestVT != N.getValueType()) + setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + DestVT, N)); // convert types + else + setValue(&I, N); // noop cast. +} + +void SelectionDAGLowering::visitInsertElement(User &I) { + SDValue InVec = getValue(I.getOperand(0)); + SDValue InVal = getValue(I.getOperand(1)); + SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), + getValue(I.getOperand(2))); + + setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(), + TLI.getValueType(I.getType()), + InVec, InVal, InIdx)); +} + +void SelectionDAGLowering::visitExtractElement(User &I) { + SDValue InVec = getValue(I.getOperand(0)); + SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), + getValue(I.getOperand(1))); + setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + TLI.getValueType(I.getType()), InVec, InIdx)); +} + + +// Utility for visitShuffleVector - Returns true if the mask is mask starting +// from SIndx and increasing to the element length (undefs are allowed). +static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) { + unsigned MaskNumElts = Mask.size(); + for (unsigned i = 0; i != MaskNumElts; ++i) + if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx))) + return false; + return true; +} + +void SelectionDAGLowering::visitShuffleVector(User &I) { + SmallVector<int, 8> Mask; + SDValue Src1 = getValue(I.getOperand(0)); + SDValue Src2 = getValue(I.getOperand(1)); + + // Convert the ConstantVector mask operand into an array of ints, with -1 + // representing undef values. + SmallVector<Constant*, 8> MaskElts; + cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts); + unsigned MaskNumElts = MaskElts.size(); + for (unsigned i = 0; i != MaskNumElts; ++i) { + if (isa<UndefValue>(MaskElts[i])) + Mask.push_back(-1); + else + Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue()); + } + + MVT VT = TLI.getValueType(I.getType()); + MVT SrcVT = Src1.getValueType(); + unsigned SrcNumElts = SrcVT.getVectorNumElements(); + + if (SrcNumElts == MaskNumElts) { + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &Mask[0])); + return; + } + + // Normalize the shuffle vector since mask and vector length don't match. + if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { + // Mask is longer than the source vectors and is a multiple of the source + // vectors. We can use concatenate vector to make the mask and vectors + // lengths match. + if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src1, Src2)); + return; + } + + // Pad both vectors with undefs to make them the same length as the mask. + unsigned NumConcat = MaskNumElts / SrcNumElts; + bool Src1U = Src1.getOpcode() == ISD::UNDEF; + bool Src2U = Src2.getOpcode() == ISD::UNDEF; + SDValue UndefVal = DAG.getUNDEF(SrcVT); + + SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); + SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal); + MOps1[0] = Src1; + MOps2[0] = Src2; + + Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurDebugLoc(), VT, + &MOps1[0], NumConcat); + Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurDebugLoc(), VT, + &MOps2[0], NumConcat); + + // Readjust mask for new input vector length. + SmallVector<int, 8> MappedOps; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < (int)SrcNumElts) + MappedOps.push_back(Idx); + else + MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); + } + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); + return; + } + + if (SrcNumElts > MaskNumElts) { + // Analyze the access pattern of the vector to see if we can extract + // two subvectors and do the shuffle. The analysis is done by calculating + // the range of elements the mask access on both vectors. + int MinRange[2] = { SrcNumElts+1, SrcNumElts+1}; + int MaxRange[2] = {-1, -1}; + + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + int Input = 0; + if (Idx < 0) + continue; + + if (Idx >= (int)SrcNumElts) { + Input = 1; + Idx -= SrcNumElts; + } + if (Idx > MaxRange[Input]) + MaxRange[Input] = Idx; + if (Idx < MinRange[Input]) + MinRange[Input] = Idx; + } + + // Check if the access is smaller than the vector size and can we find + // a reasonable extract index. + int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not Extract. + int StartIdx[2]; // StartIdx to extract from + for (int Input=0; Input < 2; ++Input) { + if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) { + RangeUse[Input] = 0; // Unused + StartIdx[Input] = 0; + } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) { + // Fits within range but we should see if we can find a good + // start index that is a multiple of the mask length. + if (MaxRange[Input] < (int)MaskNumElts) { + RangeUse[Input] = 1; // Extract from beginning of the vector + StartIdx[Input] = 0; + } else { + StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; + if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && + StartIdx[Input] + MaskNumElts < SrcNumElts) + RangeUse[Input] = 1; // Extract from a multiple of the mask length. + } + } + } + + if (RangeUse[0] == 0 && RangeUse[0] == 0) { + setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. + return; + } + else if (RangeUse[0] < 2 && RangeUse[1] < 2) { + // Extract appropriate subvector and generate a vector shuffle + for (int Input=0; Input < 2; ++Input) { + SDValue& Src = Input == 0 ? Src1 : Src2; + if (RangeUse[Input] == 0) { + Src = DAG.getUNDEF(VT); + } else { + Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT, + Src, DAG.getIntPtrConstant(StartIdx[Input])); + } + } + // Calculate new mask. + SmallVector<int, 8> MappedOps; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < 0) + MappedOps.push_back(Idx); + else if (Idx < (int)SrcNumElts) + MappedOps.push_back(Idx - StartIdx[0]); + else + MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); + } + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); + return; + } + } + + // We can't use either concat vectors or extract subvectors so fall back to + // replacing the shuffle with extract and build vector. + // to insert and build vector. + MVT EltVT = VT.getVectorElementType(); + MVT PtrVT = TLI.getPointerTy(); + SmallVector<SDValue,8> Ops; + for (unsigned i = 0; i != MaskNumElts; ++i) { + if (Mask[i] < 0) { + Ops.push_back(DAG.getUNDEF(EltVT)); + } else { + int Idx = Mask[i]; + if (Idx < (int)SrcNumElts) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src1, DAG.getConstant(Idx, PtrVT))); + else + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src2, + DAG.getConstant(Idx - SrcNumElts, PtrVT))); + } + } + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size())); +} + +void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) { + const Value *Op0 = I.getOperand(0); + const Value *Op1 = I.getOperand(1); + const Type *AggTy = I.getType(); + const Type *ValTy = Op1->getType(); + bool IntoUndef = isa<UndefValue>(Op0); + bool FromUndef = isa<UndefValue>(Op1); + + unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, + I.idx_begin(), I.idx_end()); + + SmallVector<MVT, 4> AggValueVTs; + ComputeValueVTs(TLI, AggTy, AggValueVTs); + SmallVector<MVT, 4> ValValueVTs; + ComputeValueVTs(TLI, ValTy, ValValueVTs); + + unsigned NumAggValues = AggValueVTs.size(); + unsigned NumValValues = ValValueVTs.size(); + SmallVector<SDValue, 4> Values(NumAggValues); + + SDValue Agg = getValue(Op0); + SDValue Val = getValue(Op1); + unsigned i = 0; + // Copy the beginning value(s) from the original aggregate. + for (; i != LinearIndex; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + // Copy values from the inserted value(s). + for (; i != LinearIndex + NumValValues; ++i) + Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); + // Copy remaining value(s) from the original aggregate. + for (; i != NumAggValues; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&AggValueVTs[0], NumAggValues), + &Values[0], NumAggValues)); +} + +void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) { + const Value *Op0 = I.getOperand(0); + const Type *AggTy = Op0->getType(); + const Type *ValTy = I.getType(); + bool OutOfUndef = isa<UndefValue>(Op0); + + unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, + I.idx_begin(), I.idx_end()); + + SmallVector<MVT, 4> ValValueVTs; + ComputeValueVTs(TLI, ValTy, ValValueVTs); + + unsigned NumValValues = ValValueVTs.size(); + SmallVector<SDValue, 4> Values(NumValValues); + + SDValue Agg = getValue(Op0); + // Copy out the selected value(s). + for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) + Values[i - LinearIndex] = + OutOfUndef ? + DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValValueVTs[0], NumValValues), + &Values[0], NumValValues)); +} + + +void SelectionDAGLowering::visitGetElementPtr(User &I) { + SDValue N = getValue(I.getOperand(0)); + const Type *Ty = I.getOperand(0)->getType(); + + for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end(); + OI != E; ++OI) { + Value *Idx = *OI; + if (const StructType *StTy = dyn_cast<StructType>(Ty)) { + unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); + if (Field) { + // N = N + Offset + uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + DAG.getIntPtrConstant(Offset)); + } + Ty = StTy->getElementType(Field); + } else { + Ty = cast<SequentialType>(Ty)->getElementType(); + + // If this is a constant subscript, handle it quickly. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->getZExtValue() == 0) continue; + uint64_t Offs = + TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + SDValue OffsVal; + unsigned PtrBits = TLI.getPointerTy().getSizeInBits(); + if (PtrBits < 64) { + OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(Offs, MVT::i64)); + } else + OffsVal = DAG.getIntPtrConstant(Offs); + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + OffsVal); + continue; + } + + // N = N + Idx * ElementSize; + uint64_t ElementSize = TD->getTypeAllocSize(Ty); + SDValue IdxN = getValue(Idx); + + // If the index is smaller or larger than intptr_t, truncate or extend + // it. + if (IdxN.getValueType().bitsLT(N.getValueType())) + IdxN = DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), + N.getValueType(), IdxN); + else if (IdxN.getValueType().bitsGT(N.getValueType())) + IdxN = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + N.getValueType(), IdxN); + + // If this is a multiply by a power of two, turn it into a shl + // immediately. This is a very common case. + if (ElementSize != 1) { + if (isPowerOf2_64(ElementSize)) { + unsigned Amt = Log2_64(ElementSize); + IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), + N.getValueType(), IdxN, + DAG.getConstant(Amt, TLI.getPointerTy())); + } else { + SDValue Scale = DAG.getIntPtrConstant(ElementSize); + IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), + N.getValueType(), IdxN, Scale); + } + } + + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), + N.getValueType(), N, IdxN); + } + } + setValue(&I, N); +} + +void SelectionDAGLowering::visitAlloca(AllocaInst &I) { + // If this is a fixed sized alloca in the entry block of the function, + // allocate it statically on the stack. + if (FuncInfo.StaticAllocaMap.count(&I)) + return; // getValue will auto-populate this. + + const Type *Ty = I.getAllocatedType(); + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + I.getAlignment()); + + SDValue AllocSize = getValue(I.getArraySize()); + + AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(), + AllocSize, + DAG.getConstant(TySize, AllocSize.getValueType())); + + + + MVT IntPtr = TLI.getPointerTy(); + if (IntPtr.bitsLT(AllocSize.getValueType())) + AllocSize = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + IntPtr, AllocSize); + else if (IntPtr.bitsGT(AllocSize.getValueType())) + AllocSize = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + IntPtr, AllocSize); + + // Handle alignment. If the requested alignment is less than or equal to + // the stack alignment, ignore it. If the size is greater than or equal to + // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. + unsigned StackAlign = + TLI.getTargetMachine().getFrameInfo()->getStackAlignment(); + if (Align <= StackAlign) + Align = 0; + + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. + AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(), + AllocSize.getValueType(), AllocSize, + DAG.getIntPtrConstant(StackAlign-1)); + // Mask out the low bits for alignment purposes. + AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(), + AllocSize.getValueType(), AllocSize, + DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); + + SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; + SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(), + VTs, Ops, 3); + setValue(&I, DSA); + DAG.setRoot(DSA.getValue(1)); + + // Inform the Frame Information that we have just allocated a variable-sized + // object. + CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject(); +} + +void SelectionDAGLowering::visitLoad(LoadInst &I) { + const Value *SV = I.getOperand(0); + SDValue Ptr = getValue(SV); + + const Type *Ty = I.getType(); + bool isVolatile = I.isVolatile(); + unsigned Alignment = I.getAlignment(); + + SmallVector<MVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + SDValue Root; + bool ConstantMemory = false; + if (I.isVolatile()) + // Serialize volatile loads with other side effects. + Root = getRoot(); + else if (AA->pointsToConstantMemory(SV)) { + // Do not serialize (non-volatile) loads of constant memory with anything. + Root = DAG.getEntryNode(); + ConstantMemory = true; + } else { + // Do not serialize non-volatile loads against each other. + Root = DAG.getRoot(); + } + + SmallVector<SDValue, 4> Values(NumValues); + SmallVector<SDValue, 4> Chains(NumValues); + MVT PtrVT = Ptr.getValueType(); + for (unsigned i = 0; i != NumValues; ++i) { + SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, + DAG.getNode(ISD::ADD, getCurDebugLoc(), + PtrVT, Ptr, + DAG.getConstant(Offsets[i], PtrVT)), + SV, Offsets[i], + isVolatile, Alignment); + Values[i] = L; + Chains[i] = L.getValue(1); + } + + if (!ConstantMemory) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, + &Chains[0], NumValues); + if (isVolatile) + DAG.setRoot(Chain); + else + PendingLoads.push_back(Chain); + } + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], NumValues), + &Values[0], NumValues)); +} + + +void SelectionDAGLowering::visitStore(StoreInst &I) { + Value *SrcV = I.getOperand(0); + Value *PtrV = I.getOperand(1); + + SmallVector<MVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + // Get the lowered operands. Note that we do this after + // checking if NumResults is zero, because with zero results + // the operands won't have values in the map. + SDValue Src = getValue(SrcV); + SDValue Ptr = getValue(PtrV); + + SDValue Root = getRoot(); + SmallVector<SDValue, 4> Chains(NumValues); + MVT PtrVT = Ptr.getValueType(); + bool isVolatile = I.isVolatile(); + unsigned Alignment = I.getAlignment(); + for (unsigned i = 0; i != NumValues; ++i) + Chains[i] = DAG.getStore(Root, getCurDebugLoc(), + SDValue(Src.getNode(), Src.getResNo() + i), + DAG.getNode(ISD::ADD, getCurDebugLoc(), + PtrVT, Ptr, + DAG.getConstant(Offsets[i], PtrVT)), + PtrV, Offsets[i], + isVolatile, Alignment); + + DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues)); +} + +/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC +/// node. +void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, + unsigned Intrinsic) { + bool HasChain = !I.doesNotAccessMemory(); + bool OnlyLoad = HasChain && I.onlyReadsMemory(); + + // Build the operand list. + SmallVector<SDValue, 8> Ops; + if (HasChain) { // If this intrinsic has side-effects, chainify it. + if (OnlyLoad) { + // We don't need to serialize loads against other loads. + Ops.push_back(DAG.getRoot()); + } else { + Ops.push_back(getRoot()); + } + } + + // Info is set by getTgtMemInstrinsic + TargetLowering::IntrinsicInfo Info; + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + + // Add the intrinsic ID as an integer operand if it's not a target intrinsic. + if (!IsTgtIntrinsic) + Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); + + // Add all operands of the call to the operand list. + for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) { + SDValue Op = getValue(I.getOperand(i)); + assert(TLI.isTypeLegal(Op.getValueType()) && + "Intrinsic uses a non-legal type?"); + Ops.push_back(Op); + } + + std::vector<MVT> VTArray; + if (I.getType() != Type::VoidTy) { + MVT VT = TLI.getValueType(I.getType()); + if (VT.isVector()) { + const VectorType *DestTy = cast<VectorType>(I.getType()); + MVT EltVT = TLI.getValueType(DestTy->getElementType()); + + VT = MVT::getVectorVT(EltVT, DestTy->getNumElements()); + assert(VT != MVT::Other && "Intrinsic uses a non-legal type?"); + } + + assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?"); + VTArray.push_back(VT); + } + if (HasChain) + VTArray.push_back(MVT::Other); + + SDVTList VTs = DAG.getVTList(&VTArray[0], VTArray.size()); + + // Create the node. + SDValue Result; + if (IsTgtIntrinsic) { + // This is target intrinsic that touches memory + Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), + VTs, &Ops[0], Ops.size(), + Info.memVT, Info.ptrVal, Info.offset, + Info.align, Info.vol, + Info.readMem, Info.writeMem); + } + else if (!HasChain) + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + else if (I.getType() != Type::VoidTy) + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + else + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + + if (HasChain) { + SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); + if (OnlyLoad) + PendingLoads.push_back(Chain); + else + DAG.setRoot(Chain); + } + if (I.getType() != Type::VoidTy) { + if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) { + MVT VT = TLI.getValueType(PTy); + Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); + } + setValue(&I, Result); + } +} + +/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. +static GlobalVariable *ExtractTypeInfo(Value *V) { + V = V->stripPointerCasts(); + GlobalVariable *GV = dyn_cast<GlobalVariable>(V); + assert ((GV || isa<ConstantPointerNull>(V)) && + "TypeInfo must be a global variable or NULL"); + return GV; +} + +namespace llvm { + +/// AddCatchInfo - Extract the personality and type infos from an eh.selector +/// call, and add them to the specified machine basic block. +void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, + MachineBasicBlock *MBB) { + // Inform the MachineModuleInfo of the personality for this landing pad. + ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2)); + assert(CE->getOpcode() == Instruction::BitCast && + isa<Function>(CE->getOperand(0)) && + "Personality should be a function"); + MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0))); + + // Gather all the type infos for this landing pad and pass them along to + // MachineModuleInfo. + std::vector<GlobalVariable *> TyInfo; + unsigned N = I.getNumOperands(); + + for (unsigned i = N - 1; i > 2; --i) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) { + unsigned FilterLength = CI->getZExtValue(); + unsigned FirstCatch = i + FilterLength + !FilterLength; + assert (FirstCatch <= N && "Invalid filter length"); + + if (FirstCatch < N) { + TyInfo.reserve(N - FirstCatch); + for (unsigned j = FirstCatch; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addCatchTypeInfo(MBB, TyInfo); + TyInfo.clear(); + } + + if (!FilterLength) { + // Cleanup. + MMI->addCleanup(MBB); + } else { + // Filter. + TyInfo.reserve(FilterLength - 1); + for (unsigned j = i + 1; j < FirstCatch; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addFilterTypeInfo(MBB, TyInfo); + TyInfo.clear(); + } + + N = i; + } + } + + if (N > 3) { + TyInfo.reserve(N - 3); + for (unsigned j = 3; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addCatchTypeInfo(MBB, TyInfo); + } +} + +} + +/// GetSignificand - Get the significand and build it into a floating-point +/// number with exponent of 1: +/// +/// Op = (Op & 0x007fffff) | 0x3f800000; +/// +/// where Op is the hexidecimal representation of floating point value. +static SDValue +GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { + SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x007fffff, MVT::i32)); + SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, + DAG.getConstant(0x3f800000, MVT::i32)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2); +} + +/// GetExponent - Get the exponent: +/// +/// (float)(int)(((Op & 0x7f800000) >> 23) - 127); +/// +/// where Op is the hexidecimal representation of floating point value. +static SDValue +GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, + DebugLoc dl) { + SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x7f800000, MVT::i32)); + SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, + DAG.getConstant(23, TLI.getPointerTy())); + SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, + DAG.getConstant(127, MVT::i32)); + return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); +} + +/// getF32Constant - Get 32-bit floating point constant. +static SDValue +getF32Constant(SelectionDAG &DAG, unsigned Flt) { + return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); +} + +/// Inlined utility function to implement binary input atomic intrinsics for +/// visitIntrinsicCall: I is a call instruction +/// Op is the associated NodeType for I +const char * +SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { + SDValue Root = getRoot(); + SDValue L = + DAG.getAtomic(Op, getCurDebugLoc(), + getValue(I.getOperand(2)).getValueType().getSimpleVT(), + Root, + getValue(I.getOperand(1)), + getValue(I.getOperand(2)), + I.getOperand(1)); + setValue(&I, L); + DAG.setRoot(L.getValue(1)); + return 0; +} + +// implVisitAluOverflow - Lower arithmetic overflow instrinsics. +const char * +SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) { + SDValue Op1 = getValue(I.getOperand(1)); + SDValue Op2 = getValue(I.getOperand(2)); + + SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + SDValue Result = DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2); + + setValue(&I, Result); + return 0; +} + +/// visitExp - Lower an exp intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitExp(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + + // Put the exponent in the right bit position for later addition to the + // final result: + // + // #define LOG2OFe 1.4426950f + // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, + getF32Constant(DAG, 0x3fb8aa3b)); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5); + + // Add the exponent into the result in integer domain. + SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32, + TwoToFracPartOfX, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // 0.000107046256 error, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7); + + // Add the exponent into the result in integer domain. + SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32, + TwoToFracPartOfX, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::i32, t13); + + // Add the exponent into the result in integer domain. + SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32, + TwoToFracPartOfX, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FEXP, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitLog - Lower a log intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitLog(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + + // Scale the exponent by log(2) [0.69314718f]. + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + getF32Constant(DAG, 0x3f317218)); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // LogofMantissa = + // -1.1609546f + + // (1.4034025f - 0.23903021f * x) * x; + // + // error 0.0034276066, which is better than 8 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbe74c456)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3fb3a2b1)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f949a29)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, LogOfMantissa); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // LogOfMantissa = + // -1.7417939f + + // (2.8212026f + + // (-1.4699568f + + // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; + // + // error 0.000061011436, which is 14 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbd67b6d6)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3ee4f4b8)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fbc278b)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40348e95)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fdef31a)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, LogOfMantissa); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // LogOfMantissa = + // -2.1072184f + + // (4.2372794f + + // (-3.7029485f + + // (2.2781945f + + // (-0.87823314f + + // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; + // + // error 0.0000023660568, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbc91e5ac)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e4350aa)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f60d3e3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x4011cdf0)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x406cfd1c)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x408797cb)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4006dcab)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, LogOfMantissa); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FLOG, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitLog2(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + + // Get the exponent. + SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + // Different possible minimax approximations of significand in + // floating-point for various degrees of accuracy over [1,2]. + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; + // + // error 0.0049451742, which is more than 7 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbeb08fe0)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x40019463)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fd6633d)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log2ofMantissa); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // Log2ofMantissa = + // -2.51285454f + + // (4.07009056f + + // (-2.12067489f + + // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; + // + // error 0.0000876136000, which is better than 13 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbda7262e)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3f25280b)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x4007b923)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40823e2f)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x4020d29c)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log2ofMantissa); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // Log2ofMantissa = + // -3.0400495f + + // (6.1129976f + + // (-5.3420409f + + // (3.2865683f + + // (-1.2669343f + + // (0.27515199f - + // 0.25691327e-1f * x) * x) * x) * x) * x) * x; + // + // error 0.0000018516, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbcd2769e)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e8ce0b9)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fa22ae7)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40525723)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x40aaf200)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x40c39dad)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4042902c)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log2ofMantissa); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FLOG2, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitLog10(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + + // Scale the exponent by log10(2) [0.30102999f]. + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + getF32Constant(DAG, 0x3e9a209a)); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // Log10ofMantissa = + // -0.50419619f + + // (0.60948995f - 0.10380950f * x) * x; + // + // error 0.0014886165, which is 6 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbdd49a13)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3f1c0789)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f011300)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log10ofMantissa); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // Log10ofMantissa = + // -0.64831180f + + // (0.91751397f + + // (-0.31664806f + 0.47637168e-1f * x) * x) * x; + // + // error 0.00019228036, which is better than 12 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3d431f31)); + SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3ea21fb2)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f6ae232)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f25f7c3)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log10ofMantissa); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // Log10ofMantissa = + // -0.84299375f + + // (1.5327582f + + // (-1.0688956f + + // (0.49102474f + + // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; + // + // error 0.0000037995730, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3c5d51ce)); + SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e00685a)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3efb6798)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f88d192)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fc4316c)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3f57ce70)); + + result = DAG.getNode(ISD::FADD, dl, + MVT::f32, LogOfExponent, Log10ofMantissa); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FLOG10, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGLowering::visitExp2(CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(1)); + + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); + + // FractionalPartOfX = x - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FEXP2, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1))); + } + + setValue(&I, result); +} + +/// visitPow - Lower a pow intrinsic. Handles the special sequences for +/// limited-precision mode with x == 10.0f. +void +SelectionDAGLowering::visitPow(CallInst &I) { + SDValue result; + Value *Val = I.getOperand(1); + DebugLoc dl = getCurDebugLoc(); + bool IsExp10 = false; + + if (getValue(Val).getValueType() == MVT::f32 && + getValue(I.getOperand(2)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) { + if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { + APFloat Ten(10.0f); + IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten); + } + } + } + + if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op = getValue(I.getOperand(2)); + + // Put the exponent in the right bit position for later addition to the + // final result: + // + // #define LOG2OF10 3.3219281f + // IntegerPartOfX = (int32_t)(x * LOG2OF10); + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, + getF32Constant(DAG, 0x40549a78)); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = x - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, TLI.getPointerTy())); + + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // twoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); + SDValue TwoToFractionalPartOfX = + DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); + + result = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f32, TwoToFractionalPartOfX); + } + } else { + // No special expansion. + result = DAG.getNode(ISD::FPOW, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2))); + } + + setValue(&I, result); +} + +/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If +/// we want to emit this as a call to a named external function, return the name +/// otherwise lower it and return null. +const char * +SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { + DebugLoc dl = getCurDebugLoc(); + switch (Intrinsic) { + default: + // By default, turn this into a target intrinsic node. + visitTargetIntrinsic(I, Intrinsic); + return 0; + case Intrinsic::vastart: visitVAStart(I); return 0; + case Intrinsic::vaend: visitVAEnd(I); return 0; + case Intrinsic::vacopy: visitVACopy(I); return 0; + case Intrinsic::returnaddress: + setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::frameaddress: + setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::setjmp: + return "_setjmp"+!TLI.usesUnderscoreSetJmp(); + break; + case Intrinsic::longjmp: + return "_longjmp"+!TLI.usesUnderscoreLongJmp(); + break; + case Intrinsic::memcpy: { + SDValue Op1 = getValue(I.getOperand(1)); + SDValue Op2 = getValue(I.getOperand(2)); + SDValue Op3 = getValue(I.getOperand(3)); + unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue(); + DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false, + I.getOperand(1), 0, I.getOperand(2), 0)); + return 0; + } + case Intrinsic::memset: { + SDValue Op1 = getValue(I.getOperand(1)); + SDValue Op2 = getValue(I.getOperand(2)); + SDValue Op3 = getValue(I.getOperand(3)); + unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue(); + DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, + I.getOperand(1), 0)); + return 0; + } + case Intrinsic::memmove: { + SDValue Op1 = getValue(I.getOperand(1)); + SDValue Op2 = getValue(I.getOperand(2)); + SDValue Op3 = getValue(I.getOperand(3)); + unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue(); + + // If the source and destination are known to not be aliases, we can + // lower memmove as memcpy. + uint64_t Size = -1ULL; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3)) + Size = C->getZExtValue(); + if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) == + AliasAnalysis::NoAlias) { + DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false, + I.getOperand(1), 0, I.getOperand(2), 0)); + return 0; + } + + DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, + I.getOperand(1), 0, I.getOperand(2), 0)); + return 0; + } + case Intrinsic::dbg_stoppoint: { + DbgStopPointInst &SPI = cast<DbgStopPointInst>(I); + if (DIDescriptor::ValidDebugInfo(SPI.getContext(), OptLevel)) { + MachineFunction &MF = DAG.getMachineFunction(); + DICompileUnit CU(cast<GlobalVariable>(SPI.getContext())); + DebugLoc Loc = DebugLoc::get(MF.getOrCreateDebugLocID(CU.getGV(), + SPI.getLine(), SPI.getColumn())); + setCurDebugLoc(Loc); + + if (OptLevel == CodeGenOpt::None) + DAG.setRoot(DAG.getDbgStopPoint(Loc, getRoot(), + SPI.getLine(), + SPI.getColumn(), + SPI.getContext())); + } + return 0; + } + case Intrinsic::dbg_region_start: { + DwarfWriter *DW = DAG.getDwarfWriter(); + DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I); + + if (DIDescriptor::ValidDebugInfo(RSI.getContext(), OptLevel) && + DW && DW->ShouldEmitDwarfDebug()) { + unsigned LabelID = + DW->RecordRegionStart(cast<GlobalVariable>(RSI.getContext())); + DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), + getRoot(), LabelID)); + } + + return 0; + } + case Intrinsic::dbg_region_end: { + DwarfWriter *DW = DAG.getDwarfWriter(); + DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I); + + if (DIDescriptor::ValidDebugInfo(REI.getContext(), OptLevel) && + DW && DW->ShouldEmitDwarfDebug()) { + MachineFunction &MF = DAG.getMachineFunction(); + DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext())); + + if (Subprogram.isNull() || Subprogram.describes(MF.getFunction())) { + unsigned LabelID = + DW->RecordRegionEnd(cast<GlobalVariable>(REI.getContext())); + DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), + getRoot(), LabelID)); + } else { + // This is end of inlined function. Debugging information for inlined + // function is not handled yet (only supported by FastISel). + if (OptLevel == CodeGenOpt::None) { + unsigned ID = DW->RecordInlinedFnEnd(Subprogram); + if (ID != 0) + // Returned ID is 0 if this is unbalanced "end of inlined + // scope". This could happen if optimizer eats dbg intrinsics or + // "beginning of inlined scope" is not recoginized due to missing + // location info. In such cases, do ignore this region.end. + DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), + getRoot(), ID)); + } + } + } + + return 0; + } + case Intrinsic::dbg_func_start: { + DwarfWriter *DW = DAG.getDwarfWriter(); + DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I); + Value *SP = FSI.getSubprogram(); + if (!DIDescriptor::ValidDebugInfo(SP, OptLevel)) + return 0; + + MachineFunction &MF = DAG.getMachineFunction(); + if (OptLevel == CodeGenOpt::None) { + // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is what + // (most?) gdb expects. + DebugLoc PrevLoc = CurDebugLoc; + DISubprogram Subprogram(cast<GlobalVariable>(SP)); + DICompileUnit CompileUnit = Subprogram.getCompileUnit(); + + if (!Subprogram.describes(MF.getFunction())) { + // This is a beginning of an inlined function. + + // If llvm.dbg.func.start is seen in a new block before any + // llvm.dbg.stoppoint intrinsic then the location info is unknown. + // FIXME : Why DebugLoc is reset at the beginning of each block ? + if (PrevLoc.isUnknown()) + return 0; + + // Record the source line. + unsigned Line = Subprogram.getLineNumber(); + setCurDebugLoc(DebugLoc::get( + MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0))); + + if (DW && DW->ShouldEmitDwarfDebug()) { + DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc); + unsigned LabelID = DW->RecordInlinedFnStart(Subprogram, + DICompileUnit(PrevLocTpl.CompileUnit), + PrevLocTpl.Line, + PrevLocTpl.Col); + DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), + getRoot(), LabelID)); + } + } else { + // Record the source line. + unsigned Line = Subprogram.getLineNumber(); + MF.setDefaultDebugLoc(DebugLoc::get( + MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0))); + if (DW && DW->ShouldEmitDwarfDebug()) { + // llvm.dbg.func_start also defines beginning of function scope. + DW->RecordRegionStart(cast<GlobalVariable>(FSI.getSubprogram())); + } + } + } else { + DISubprogram Subprogram(cast<GlobalVariable>(SP)); + + std::string SPName; + Subprogram.getLinkageName(SPName); + if (!SPName.empty() + && strcmp(SPName.c_str(), MF.getFunction()->getNameStart())) { + // This is beginning of inlined function. Debugging information for + // inlined function is not handled yet (only supported by FastISel). + return 0; + } + + // llvm.dbg.func.start implicitly defines a dbg_stoppoint which is + // what (most?) gdb expects. + DICompileUnit CompileUnit = Subprogram.getCompileUnit(); + + // Record the source line but does not create a label for the normal + // function start. It will be emitted at asm emission time. However, + // create a label if this is a beginning of inlined function. + unsigned Line = Subprogram.getLineNumber(); + setCurDebugLoc(DebugLoc::get( + MF.getOrCreateDebugLocID(CompileUnit.getGV(), Line, 0))); + // FIXME - Start new region because llvm.dbg.func_start also defines + // beginning of function scope. + } + + return 0; + } + case Intrinsic::dbg_declare: { + if (OptLevel == CodeGenOpt::None) { + DbgDeclareInst &DI = cast<DbgDeclareInst>(I); + Value *Variable = DI.getVariable(); + if (DIDescriptor::ValidDebugInfo(Variable, OptLevel)) + DAG.setRoot(DAG.getNode(ISD::DECLARE, dl, MVT::Other, getRoot(), + getValue(DI.getAddress()), getValue(Variable))); + } else { + // FIXME: Do something sensible here when we support debug declare. + } + return 0; + } + case Intrinsic::eh_exception: { + // Insert the EXCEPTIONADDR instruction. + assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!"); + SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + SDValue Ops[1]; + Ops[0] = DAG.getRoot(); + SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1); + setValue(&I, Op); + DAG.setRoot(Op.getValue(1)); + return 0; + } + + case Intrinsic::eh_selector_i32: + case Intrinsic::eh_selector_i64: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + MVT VT = (Intrinsic == Intrinsic::eh_selector_i32 ? + MVT::i32 : MVT::i64); + + if (MMI) { + if (CurMBB->isLandingPad()) + AddCatchInfo(I, MMI, CurMBB); + else { +#ifndef NDEBUG + FuncInfo.CatchInfoLost.insert(&I); +#endif + // FIXME: Mark exception selector register as live in. Hack for PR1508. + unsigned Reg = TLI.getExceptionSelectorRegister(); + if (Reg) CurMBB->addLiveIn(Reg); + } + + // Insert the EHSELECTION instruction. + SDVTList VTs = DAG.getVTList(VT, MVT::Other); + SDValue Ops[2]; + Ops[0] = getValue(I.getOperand(1)); + Ops[1] = getRoot(); + SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); + setValue(&I, Op); + DAG.setRoot(Op.getValue(1)); + } else { + setValue(&I, DAG.getConstant(0, VT)); + } + + return 0; + } + + case Intrinsic::eh_typeid_for_i32: + case Intrinsic::eh_typeid_for_i64: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + MVT VT = (Intrinsic == Intrinsic::eh_typeid_for_i32 ? + MVT::i32 : MVT::i64); + + if (MMI) { + // Find the type id for the given typeinfo. + GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1)); + + unsigned TypeID = MMI->getTypeIDFor(GV); + setValue(&I, DAG.getConstant(TypeID, VT)); + } else { + // Return something different to eh_selector. + setValue(&I, DAG.getConstant(1, VT)); + } + + return 0; + } + + case Intrinsic::eh_return_i32: + case Intrinsic::eh_return_i64: + if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) { + MMI->setCallsEHReturn(true); + DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, + MVT::Other, + getControlRoot(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2)))); + } else { + setValue(&I, DAG.getConstant(0, TLI.getPointerTy())); + } + + return 0; + case Intrinsic::eh_unwind_init: + if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) { + MMI->setCallsUnwindInit(true); + } + + return 0; + + case Intrinsic::eh_dwarf_cfa: { + MVT VT = getValue(I.getOperand(1)).getValueType(); + SDValue CfaArg; + if (VT.bitsGT(TLI.getPointerTy())) + CfaArg = DAG.getNode(ISD::TRUNCATE, dl, + TLI.getPointerTy(), getValue(I.getOperand(1))); + else + CfaArg = DAG.getNode(ISD::SIGN_EXTEND, dl, + TLI.getPointerTy(), getValue(I.getOperand(1))); + + SDValue Offset = DAG.getNode(ISD::ADD, dl, + TLI.getPointerTy(), + DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl, + TLI.getPointerTy()), + CfaArg); + setValue(&I, DAG.getNode(ISD::ADD, dl, + TLI.getPointerTy(), + DAG.getNode(ISD::FRAMEADDR, dl, + TLI.getPointerTy(), + DAG.getConstant(0, + TLI.getPointerTy())), + Offset)); + return 0; + } + + case Intrinsic::convertff: + case Intrinsic::convertfsi: + case Intrinsic::convertfui: + case Intrinsic::convertsif: + case Intrinsic::convertuif: + case Intrinsic::convertss: + case Intrinsic::convertsu: + case Intrinsic::convertus: + case Intrinsic::convertuu: { + ISD::CvtCode Code = ISD::CVT_INVALID; + switch (Intrinsic) { + case Intrinsic::convertff: Code = ISD::CVT_FF; break; + case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; + case Intrinsic::convertfui: Code = ISD::CVT_FU; break; + case Intrinsic::convertsif: Code = ISD::CVT_SF; break; + case Intrinsic::convertuif: Code = ISD::CVT_UF; break; + case Intrinsic::convertss: Code = ISD::CVT_SS; break; + case Intrinsic::convertsu: Code = ISD::CVT_SU; break; + case Intrinsic::convertus: Code = ISD::CVT_US; break; + case Intrinsic::convertuu: Code = ISD::CVT_UU; break; + } + MVT DestVT = TLI.getValueType(I.getType()); + Value* Op1 = I.getOperand(1); + setValue(&I, DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), + DAG.getValueType(DestVT), + DAG.getValueType(getValue(Op1).getValueType()), + getValue(I.getOperand(2)), + getValue(I.getOperand(3)), + Code)); + return 0; + } + + case Intrinsic::sqrt: + setValue(&I, DAG.getNode(ISD::FSQRT, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::powi: + setValue(&I, DAG.getNode(ISD::FPOWI, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2)))); + return 0; + case Intrinsic::sin: + setValue(&I, DAG.getNode(ISD::FSIN, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::cos: + setValue(&I, DAG.getNode(ISD::FCOS, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::log: + visitLog(I); + return 0; + case Intrinsic::log2: + visitLog2(I); + return 0; + case Intrinsic::log10: + visitLog10(I); + return 0; + case Intrinsic::exp: + visitExp(I); + return 0; + case Intrinsic::exp2: + visitExp2(I); + return 0; + case Intrinsic::pow: + visitPow(I); + return 0; + case Intrinsic::pcmarker: { + SDValue Tmp = getValue(I.getOperand(1)); + DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); + return 0; + } + case Intrinsic::readcyclecounter: { + SDValue Op = getRoot(); + SDValue Tmp = DAG.getNode(ISD::READCYCLECOUNTER, dl, + DAG.getVTList(MVT::i64, MVT::Other), + &Op, 1); + setValue(&I, Tmp); + DAG.setRoot(Tmp.getValue(1)); + return 0; + } + case Intrinsic::part_select: { + // Currently not implemented: just abort + assert(0 && "part_select intrinsic not implemented"); + abort(); + } + case Intrinsic::part_set: { + // Currently not implemented: just abort + assert(0 && "part_set intrinsic not implemented"); + abort(); + } + case Intrinsic::bswap: + setValue(&I, DAG.getNode(ISD::BSWAP, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); + return 0; + case Intrinsic::cttz: { + SDValue Arg = getValue(I.getOperand(1)); + MVT Ty = Arg.getValueType(); + SDValue result = DAG.getNode(ISD::CTTZ, dl, Ty, Arg); + setValue(&I, result); + return 0; + } + case Intrinsic::ctlz: { + SDValue Arg = getValue(I.getOperand(1)); + MVT Ty = Arg.getValueType(); + SDValue result = DAG.getNode(ISD::CTLZ, dl, Ty, Arg); + setValue(&I, result); + return 0; + } + case Intrinsic::ctpop: { + SDValue Arg = getValue(I.getOperand(1)); + MVT Ty = Arg.getValueType(); + SDValue result = DAG.getNode(ISD::CTPOP, dl, Ty, Arg); + setValue(&I, result); + return 0; + } + case Intrinsic::stacksave: { + SDValue Op = getRoot(); + SDValue Tmp = DAG.getNode(ISD::STACKSAVE, dl, + DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1); + setValue(&I, Tmp); + DAG.setRoot(Tmp.getValue(1)); + return 0; + } + case Intrinsic::stackrestore: { + SDValue Tmp = getValue(I.getOperand(1)); + DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Tmp)); + return 0; + } + case Intrinsic::stackprotector: { + // Emit code into the DAG to store the stack guard onto the stack. + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MVT PtrTy = TLI.getPointerTy(); + + SDValue Src = getValue(I.getOperand(1)); // The guard's value. + AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2)); + + int FI = FuncInfo.StaticAllocaMap[Slot]; + MFI->setStackProtectorIndex(FI); + + SDValue FIN = DAG.getFrameIndex(FI, PtrTy); + + // Store the stack protector onto the stack. + SDValue Result = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN, + PseudoSourceValue::getFixedStack(FI), + 0, true); + setValue(&I, Result); + DAG.setRoot(Result); + return 0; + } + case Intrinsic::var_annotation: + // Discard annotate attributes + return 0; + + case Intrinsic::init_trampoline: { + const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts()); + + SDValue Ops[6]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getOperand(1)); + Ops[2] = getValue(I.getOperand(2)); + Ops[3] = getValue(I.getOperand(3)); + Ops[4] = DAG.getSrcValue(I.getOperand(1)); + Ops[5] = DAG.getSrcValue(F); + + SDValue Tmp = DAG.getNode(ISD::TRAMPOLINE, dl, + DAG.getVTList(TLI.getPointerTy(), MVT::Other), + Ops, 6); + + setValue(&I, Tmp); + DAG.setRoot(Tmp.getValue(1)); + return 0; + } + + case Intrinsic::gcroot: + if (GFI) { + Value *Alloca = I.getOperand(1); + Constant *TypeMap = cast<Constant>(I.getOperand(2)); + + FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); + GFI->addStackRoot(FI->getIndex(), TypeMap); + } + return 0; + + case Intrinsic::gcread: + case Intrinsic::gcwrite: + assert(0 && "GC failed to lower gcread/gcwrite intrinsics!"); + return 0; + + case Intrinsic::flt_rounds: { + setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); + return 0; + } + + case Intrinsic::trap: { + DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + return 0; + } + + case Intrinsic::uadd_with_overflow: + return implVisitAluOverflow(I, ISD::UADDO); + case Intrinsic::sadd_with_overflow: + return implVisitAluOverflow(I, ISD::SADDO); + case Intrinsic::usub_with_overflow: + return implVisitAluOverflow(I, ISD::USUBO); + case Intrinsic::ssub_with_overflow: + return implVisitAluOverflow(I, ISD::SSUBO); + case Intrinsic::umul_with_overflow: + return implVisitAluOverflow(I, ISD::UMULO); + case Intrinsic::smul_with_overflow: + return implVisitAluOverflow(I, ISD::SMULO); + + case Intrinsic::prefetch: { + SDValue Ops[4]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getOperand(1)); + Ops[2] = getValue(I.getOperand(2)); + Ops[3] = getValue(I.getOperand(3)); + DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4)); + return 0; + } + + case Intrinsic::memory_barrier: { + SDValue Ops[6]; + Ops[0] = getRoot(); + for (int x = 1; x < 6; ++x) + Ops[x] = getValue(I.getOperand(x)); + + DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6)); + return 0; + } + case Intrinsic::atomic_cmp_swap: { + SDValue Root = getRoot(); + SDValue L = + DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(), + getValue(I.getOperand(2)).getValueType().getSimpleVT(), + Root, + getValue(I.getOperand(1)), + getValue(I.getOperand(2)), + getValue(I.getOperand(3)), + I.getOperand(1)); + setValue(&I, L); + DAG.setRoot(L.getValue(1)); + return 0; + } + case Intrinsic::atomic_load_add: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD); + case Intrinsic::atomic_load_sub: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB); + case Intrinsic::atomic_load_or: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR); + case Intrinsic::atomic_load_xor: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR); + case Intrinsic::atomic_load_and: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND); + case Intrinsic::atomic_load_nand: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND); + case Intrinsic::atomic_load_max: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX); + case Intrinsic::atomic_load_min: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN); + case Intrinsic::atomic_load_umin: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN); + case Intrinsic::atomic_load_umax: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX); + case Intrinsic::atomic_swap: + return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP); + } +} + + +void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, + bool IsTailCall, + MachineBasicBlock *LandingPad) { + const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + unsigned BeginLabel = 0, EndLabel = 0; + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Args.reserve(CS.arg_size()); + for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + SDValue ArgNode = getValue(*i); + Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); + + unsigned attrInd = i - CS.arg_begin() + 1; + Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); + Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); + Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); + Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); + Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); + Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); + Entry.Alignment = CS.getParamAlignment(attrInd); + Args.push_back(Entry); + } + + if (LandingPad && MMI) { + // Insert a label before the invoke call to mark the try range. This can be + // used to detect deletion of the invoke via the MachineModuleInfo. + BeginLabel = MMI->NextLabelID(); + // Both PendingLoads and PendingExports must be flushed here; + // this call might not return. + (void)getRoot(); + DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(), + getControlRoot(), BeginLabel)); + } + + std::pair<SDValue,SDValue> Result = + TLI.LowerCallTo(getRoot(), CS.getType(), + CS.paramHasAttr(0, Attribute::SExt), + CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(), + CS.paramHasAttr(0, Attribute::InReg), + CS.getCallingConv(), + IsTailCall && PerformTailCallOpt, + Callee, Args, DAG, getCurDebugLoc()); + if (CS.getType() != Type::VoidTy) + setValue(CS.getInstruction(), Result.first); + DAG.setRoot(Result.second); + + if (LandingPad && MMI) { + // Insert a label at the end of the invoke call to mark the try range. This + // can be used to detect deletion of the invoke via the MachineModuleInfo. + EndLabel = MMI->NextLabelID(); + DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(), + getRoot(), EndLabel)); + + // Inform MachineModuleInfo of range. + MMI->addInvoke(LandingPad, BeginLabel, EndLabel); + } +} + + +void SelectionDAGLowering::visitCall(CallInst &I) { + const char *RenameFn = 0; + if (Function *F = I.getCalledFunction()) { + if (F->isDeclaration()) { + const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo(); + if (II) { + if (unsigned IID = II->getIntrinsicID(F)) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; + } + } + if (unsigned IID = F->getIntrinsicID()) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; + } + } + + // Check for well-known libc/libm calls. If the function is internal, it + // can't be a library call. + unsigned NameLen = F->getNameLen(); + if (!F->hasLocalLinkage() && NameLen) { + const char *NameStr = F->getNameStart(); + if (NameStr[0] == 'c' && + ((NameLen == 8 && !strcmp(NameStr, "copysign")) || + (NameLen == 9 && !strcmp(NameStr, "copysignf")))) { + if (I.getNumOperands() == 3 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType() && + I.getType() == I.getOperand(2)->getType()) { + SDValue LHS = getValue(I.getOperand(1)); + SDValue RHS = getValue(I.getOperand(2)); + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), + LHS.getValueType(), LHS, RHS)); + return; + } + } else if (NameStr[0] == 'f' && + ((NameLen == 4 && !strcmp(NameStr, "fabs")) || + (NameLen == 5 && !strcmp(NameStr, "fabsf")) || + (NameLen == 5 && !strcmp(NameStr, "fabsl")))) { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if (NameStr[0] == 's' && + ((NameLen == 3 && !strcmp(NameStr, "sin")) || + (NameLen == 4 && !strcmp(NameStr, "sinf")) || + (NameLen == 4 && !strcmp(NameStr, "sinl")))) { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if (NameStr[0] == 'c' && + ((NameLen == 3 && !strcmp(NameStr, "cos")) || + (NameLen == 4 && !strcmp(NameStr, "cosf")) || + (NameLen == 4 && !strcmp(NameStr, "cosl")))) { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } + } + } else if (isa<InlineAsm>(I.getOperand(0))) { + visitInlineAsm(&I); + return; + } + + SDValue Callee; + if (!RenameFn) + Callee = getValue(I.getOperand(0)); + else + Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); + + LowerCallTo(&I, Callee, I.isTailCall()); +} + + +/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from +/// this value and returns the result as a ValueVT value. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, + SDValue *Flag) const { + // Assemble the legal parts into the final values. + SmallVector<SDValue, 4> Values(ValueVTs.size()); + SmallVector<SDValue, 8> Parts; + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + // Copy the legal parts from the registers. + MVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI->getNumRegisters(ValueVT); + MVT RegisterVT = RegVTs[Value]; + + Parts.resize(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue P; + if (Flag == 0) + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); + else { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); + *Flag = P.getValue(2); + } + Chain = P.getValue(1); + + // If the source register was virtual and if we know something about it, + // add an assert node. + if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && + RegisterVT.isInteger() && !RegisterVT.isVector()) { + unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; + FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); + if (FLI.LiveOutRegInfo.size() > SlotNo) { + FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo]; + + unsigned RegSize = RegisterVT.getSizeInBits(); + unsigned NumSignBits = LOI.NumSignBits; + unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); + + // FIXME: We capture more information than the dag can represent. For + // now, just use the tightest assertzext/assertsext possible. + bool isSExt = true; + MVT FromVT(MVT::Other); + if (NumSignBits == RegSize) + isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 + else if (NumZeroBits >= RegSize-1) + isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 + else if (NumSignBits > RegSize-8) + isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 + else if (NumZeroBits >= RegSize-8) + isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 + else if (NumSignBits > RegSize-16) + isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 + else if (NumZeroBits >= RegSize-16) + isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 + else if (NumSignBits > RegSize-32) + isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 + else if (NumZeroBits >= RegSize-32) + isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 + + if (FromVT != MVT::Other) { + P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, + RegisterVT, P, DAG.getValueType(FromVT)); + + } + } + } + + Parts[i] = P; + } + + Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), + NumRegs, RegisterVT, ValueVT); + Part += NumRegs; + Parts.clear(); + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&ValueVTs[0], ValueVTs.size()), + &Values[0], ValueVTs.size()); +} + +/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the +/// specified value into the registers specified by this object. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + // Get the list of the values's legal parts. + unsigned NumRegs = Regs.size(); + SmallVector<SDValue, 8> Parts(NumRegs); + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + MVT ValueVT = ValueVTs[Value]; + unsigned NumParts = TLI->getNumRegisters(ValueVT); + MVT RegisterVT = RegVTs[Value]; + + getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), + &Parts[Part], NumParts, RegisterVT); + Part += NumParts; + } + + // Copy the parts into the registers. + SmallVector<SDValue, 8> Chains(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue Part; + if (Flag == 0) + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); + else { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); + *Flag = Part.getValue(1); + } + Chains[i] = Part.getValue(0); + } + + if (NumRegs == 1 || Flag) + // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is + // flagged to it. That is the CopyToReg nodes and the user are considered + // a single scheduling unit. If we create a TokenFactor and return it as + // chain, then the TokenFactor is both a predecessor (operand) of the + // user as well as a successor (the TF operands are flagged to the user). + // c1, f1 = CopyToReg + // c2, f2 = CopyToReg + // c3 = TokenFactor c1, c2 + // ... + // = op c3, ..., f2 + Chain = Chains[NumRegs-1]; + else + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); +} + +/// AddInlineAsmOperands - Add this value to the specified inlineasm node +/// operand list. This adds the code marker and includes the number of +/// values added into it. +void RegsForValue::AddInlineAsmOperands(unsigned Code, + bool HasMatching,unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const { + MVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy(); + assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!"); + unsigned Flag = Code | (Regs.size() << 3); + if (HasMatching) + Flag |= 0x80000000 | (MatchingIdx << 16); + Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy)); + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { + unsigned NumRegs = TLI->getNumRegisters(ValueVTs[Value]); + MVT RegisterVT = RegVTs[Value]; + for (unsigned i = 0; i != NumRegs; ++i) { + assert(Reg < Regs.size() && "Mismatch in # registers expected"); + Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); + } + } +} + +/// isAllocatableRegister - If the specified register is safe to allocate, +/// i.e. it isn't a stack pointer or some other special register, return the +/// register class for the register. Otherwise, return null. +static const TargetRegisterClass * +isAllocatableRegister(unsigned Reg, MachineFunction &MF, + const TargetLowering &TLI, + const TargetRegisterInfo *TRI) { + MVT FoundVT = MVT::Other; + const TargetRegisterClass *FoundRC = 0; + for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), + E = TRI->regclass_end(); RCI != E; ++RCI) { + MVT ThisVT = MVT::Other; + + const TargetRegisterClass *RC = *RCI; + // If none of the the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (TLI.isTypeLegal(*I)) { + // If we have already found this register in a different register class, + // choose the one with the largest VT specified. For example, on + // PowerPC, we favor f64 register classes over f32. + if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { + ThisVT = *I; + break; + } + } + } + + if (ThisVT == MVT::Other) continue; + + // NOTE: This isn't ideal. In particular, this might allocate the + // frame pointer in functions that need it (due to them not being taken + // out of allocation, because a variable sized allocation hasn't been seen + // yet). This is a slight code pessimization, but should still work. + for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), + E = RC->allocation_order_end(MF); I != E; ++I) + if (*I == Reg) { + // We found a matching register class. Keep looking at others in case + // we find one with larger registers that this physreg is also in. + FoundRC = RC; + FoundVT = ThisVT; + break; + } + } + return FoundRC; +} + + +namespace llvm { +/// AsmOperandInfo - This contains information for each constraint that we are +/// lowering. +class VISIBILITY_HIDDEN SDISelAsmOperandInfo : + public TargetLowering::AsmOperandInfo { +public: + /// CallOperand - If this is the result output operand or a clobber + /// this is null, otherwise it is the incoming operand to the CallInst. + /// This gets modified as the asm is processed. + SDValue CallOperand; + + /// AssignedRegs - If this is a register or register class operand, this + /// contains the set of register corresponding to the operand. + RegsForValue AssignedRegs; + + explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info) + : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { + } + + /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers + /// busy in OutputRegs/InputRegs. + void MarkAllocatedRegs(bool isOutReg, bool isInReg, + std::set<unsigned> &OutputRegs, + std::set<unsigned> &InputRegs, + const TargetRegisterInfo &TRI) const { + if (isOutReg) { + for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) + MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI); + } + if (isInReg) { + for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) + MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI); + } + } + + /// getCallOperandValMVT - Return the MVT of the Value* that this operand + /// corresponds to. If there is no Value* for this operand, it returns + /// MVT::Other. + MVT getCallOperandValMVT(const TargetLowering &TLI, + const TargetData *TD) const { + if (CallOperandVal == 0) return MVT::Other; + + if (isa<BasicBlock>(CallOperandVal)) + return TLI.getPointerTy(); + + const llvm::Type *OpTy = CallOperandVal->getType(); + + // If this is an indirect operand, the operand is a pointer to the + // accessed type. + if (isIndirect) + OpTy = cast<PointerType>(OpTy)->getElementType(); + + // If OpTy is not a single value, it may be a struct/union that we + // can tile with integers. + if (!OpTy->isSingleValueType() && OpTy->isSized()) { + unsigned BitSize = TD->getTypeSizeInBits(OpTy); + switch (BitSize) { + default: break; + case 1: + case 8: + case 16: + case 32: + case 64: + case 128: + OpTy = IntegerType::get(BitSize); + break; + } + } + + return TLI.getValueType(OpTy, true); + } + +private: + /// MarkRegAndAliases - Mark the specified register and all aliases in the + /// specified set. + static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs, + const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg"); + Regs.insert(Reg); + if (const unsigned *Aliases = TRI.getAliasSet(Reg)) + for (; *Aliases; ++Aliases) + Regs.insert(*Aliases); + } +}; +} // end llvm namespace. + + +/// GetRegistersForValue - Assign registers (virtual or physical) for the +/// specified operand. We prefer to assign virtual registers, to allow the +/// register allocator handle the assignment process. However, if the asm uses +/// features that we can't model on machineinstrs, we have SDISel do the +/// allocation. This produces generally horrible, but correct, code. +/// +/// OpInfo describes the operand. +/// Input and OutputRegs are the set of already allocated physical registers. +/// +void SelectionDAGLowering:: +GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, + std::set<unsigned> &OutputRegs, + std::set<unsigned> &InputRegs) { + // Compute whether this value requires an input register, an output register, + // or both. + bool isOutReg = false; + bool isInReg = false; + switch (OpInfo.Type) { + case InlineAsm::isOutput: + isOutReg = true; + + // If there is an input constraint that matches this, we need to reserve + // the input register so no other inputs allocate to it. + isInReg = OpInfo.hasMatchingInput(); + break; + case InlineAsm::isInput: + isInReg = true; + isOutReg = false; + break; + case InlineAsm::isClobber: + isOutReg = true; + isInReg = true; + break; + } + + + MachineFunction &MF = DAG.getMachineFunction(); + SmallVector<unsigned, 4> Regs; + + // If this is a constraint for a single physreg, or a constraint for a + // register class, find it. + std::pair<unsigned, const TargetRegisterClass*> PhysReg = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + + unsigned NumRegs = 1; + if (OpInfo.ConstraintVT != MVT::Other) { + // If this is a FP input in an integer register (or visa versa) insert a bit + // cast of the input value. More generally, handle any case where the input + // value disagrees with the register class we plan to stick this in. + if (OpInfo.Type == InlineAsm::isInput && + PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { + // Try to convert to the first MVT that the reg class contains. If the + // types are identical size, use a bitcast to convert (e.g. two differing + // vector types). + MVT RegVT = *PhysReg.second->vt_begin(); + if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { + OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + RegVT, OpInfo.CallOperand); + OpInfo.ConstraintVT = RegVT; + } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { + // If the input is a FP value and we want it in FP registers, do a + // bitcast to the corresponding integer type. This turns an f64 value + // into i64, which can be passed with two i32 values on a 32-bit + // machine. + RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); + OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + RegVT, OpInfo.CallOperand); + OpInfo.ConstraintVT = RegVT; + } + } + + NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT); + } + + MVT RegVT; + MVT ValueVT = OpInfo.ConstraintVT; + + // If this is a constraint for a specific physical register, like {r17}, + // assign it now. + if (unsigned AssignedReg = PhysReg.first) { + const TargetRegisterClass *RC = PhysReg.second; + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = *RC->vt_begin(); + + // Get the actual register value type. This is important, because the user + // may have asked for (e.g.) the AX register in i32 type. We need to + // remember that AX is actually i16 to get the right extension. + RegVT = *RC->vt_begin(); + + // This is a explicit reference to a physical register. + Regs.push_back(AssignedReg); + + // If this is an expanded reference, add the rest of the regs to Regs. + if (NumRegs != 1) { + TargetRegisterClass::iterator I = RC->begin(); + for (; *I != AssignedReg; ++I) + assert(I != RC->end() && "Didn't find reg!"); + + // Already added the first reg. + --NumRegs; ++I; + for (; NumRegs; --NumRegs, ++I) { + assert(I != RC->end() && "Ran out of registers to allocate!"); + Regs.push_back(*I); + } + } + OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); + return; + } + + // Otherwise, if this was a reference to an LLVM register class, create vregs + // for this reference. + if (const TargetRegisterClass *RC = PhysReg.second) { + RegVT = *RC->vt_begin(); + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = RegVT; + + // Create the appropriate number of virtual registers. + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + for (; NumRegs; --NumRegs) + Regs.push_back(RegInfo.createVirtualRegister(RC)); + + OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + return; + } + + // This is a reference to a register class that doesn't directly correspond + // to an LLVM register class. Allocate NumRegs consecutive, available, + // registers from the class. + std::vector<unsigned> RegClassRegs + = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + unsigned NumAllocated = 0; + for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) { + unsigned Reg = RegClassRegs[i]; + // See if this register is available. + if ((isOutReg && OutputRegs.count(Reg)) || // Already used. + (isInReg && InputRegs.count(Reg))) { // Already used. + // Make sure we find consecutive registers. + NumAllocated = 0; + continue; + } + + // Check to see if this register is allocatable (i.e. don't give out the + // stack pointer). + const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI); + if (!RC) { // Couldn't allocate this register. + // Reset NumAllocated to make sure we return consecutive registers. + NumAllocated = 0; + continue; + } + + // Okay, this register is good, we can use it. + ++NumAllocated; + + // If we allocated enough consecutive registers, succeed. + if (NumAllocated == NumRegs) { + unsigned RegStart = (i-NumAllocated)+1; + unsigned RegEnd = i+1; + // Mark all of the allocated registers used. + for (unsigned i = RegStart; i != RegEnd; ++i) + Regs.push_back(RegClassRegs[i]); + + OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(), + OpInfo.ConstraintVT); + OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); + return; + } + } + + // Otherwise, we couldn't allocate enough registers for this. +} + +/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being +/// processed uses a memory 'm' constraint. +static bool +hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos, + const TargetLowering &TLI) { + for (unsigned i = 0, e = CInfos.size(); i != e; ++i) { + InlineAsm::ConstraintInfo &CI = CInfos[i]; + for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) { + TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]); + if (CType == TargetLowering::C_Memory) + return true; + } + + // Indirect operand accesses access memory. + if (CI.isIndirect) + return true; + } + + return false; +} + +/// visitInlineAsm - Handle a call to an InlineAsm object. +/// +void SelectionDAGLowering::visitInlineAsm(CallSite CS) { + InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + + /// ConstraintOperands - Information about all of the constraints. + std::vector<SDISelAsmOperandInfo> ConstraintOperands; + + std::set<unsigned> OutputRegs, InputRegs; + + // Do a prepass over the constraints, canonicalizing them, and building up the + // ConstraintOperands list. + std::vector<InlineAsm::ConstraintInfo> + ConstraintInfos = IA->ParseConstraints(); + + bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI); + + SDValue Chain, Flag; + + // We won't need to flush pending loads if this asm doesn't touch + // memory and is nonvolatile. + if (hasMemory || IA->hasSideEffects()) + Chain = getRoot(); + else + Chain = DAG.getRoot(); + + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. + for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i])); + SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); + + MVT OpVT = MVT::Other; + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + // Indirect outputs just consume an argument. + if (OpInfo.isIndirect) { + OpInfo.CallOperandVal = CS.getArgument(ArgNo++); + break; + } + + // The return value of the call is this value. As such, there is no + // corresponding argument. + assert(CS.getType() != Type::VoidTy && "Bad inline asm!"); + if (const StructType *STy = dyn_cast<StructType>(CS.getType())) { + OpVT = TLI.getValueType(STy->getElementType(ResNo)); + } else { + assert(ResNo == 0 && "Asm only has one result!"); + OpVT = TLI.getValueType(CS.getType()); + } + ++ResNo; + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = CS.getArgument(ArgNo++); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + // If this is an input or an indirect output, process the call argument. + // BasicBlocks are labels, currently appearing only in asm's. + if (OpInfo.CallOperandVal) { + if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { + OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); + } else { + OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); + } + + OpVT = OpInfo.getCallOperandValMVT(TLI, TD); + } + + OpInfo.ConstraintVT = OpVT; + } + + // Second pass over the constraints: compute which constraint option to use + // and assign registers to constraints that want a specific physreg. + for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + // If this is an output operand with a matching input operand, look up the + // matching input. If their types mismatch, e.g. one is an integer, the + // other is floating point, or their sizes are different, flag it as an + // error. + if (OpInfo.hasMatchingInput()) { + SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (OpInfo.ConstraintVT.getSizeInBits() != + Input.ConstraintVT.getSizeInBits())) { + cerr << "llvm: error: Unsupported asm: input constraint with a " + << "matching output constraint of incompatible type!\n"; + exit(1); + } + Input.ConstraintVT = OpInfo.ConstraintVT; + } + } + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG); + + // If this is a memory input, and if the operand is not indirect, do what we + // need to to provide an address for the memory input. + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + !OpInfo.isIndirect) { + assert(OpInfo.Type == InlineAsm::isInput && + "Can only indirectify direct input operands!"); + + // Memory operands really want the address of the value. If we don't have + // an indirect input, put it in the constpool if we can, otherwise spill + // it to a stack slot. + + // If the operand is a float, integer, or vector constant, spill to a + // constant pool entry to get its address. + Value *OpVal = OpInfo.CallOperandVal; + if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || + isa<ConstantVector>(OpVal)) { + OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), + TLI.getPointerTy()); + } else { + // Otherwise, create a stack slot and emit a store to it before the + // asm. + const Type *Ty = OpVal->getType(); + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + Chain = DAG.getStore(Chain, getCurDebugLoc(), + OpInfo.CallOperand, StackSlot, NULL, 0); + OpInfo.CallOperand = StackSlot; + } + + // There is no longer a Value* corresponding to this operand. + OpInfo.CallOperandVal = 0; + // It is now an indirect operand. + OpInfo.isIndirect = true; + } + + // If this constraint is for a specific register, allocate it before + // anything else. + if (OpInfo.ConstraintType == TargetLowering::C_Register) + GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + } + ConstraintInfos.clear(); + + + // Second pass - Loop over all of the operands, assigning virtual or physregs + // to register class operands. + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + // C_Register operands have already been allocated, Other/Memory don't need + // to be. + if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) + GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + } + + // AsmNodeOperands - The operands for the ISD::INLINEASM node. + std::vector<SDValue> AsmNodeOperands; + AsmNodeOperands.push_back(SDValue()); // reserve space for input chain + AsmNodeOperands.push_back( + DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other)); + + + // Loop over all of the inputs, copying the operand values into the + // appropriate registers and processing the output regs. + RegsForValue RetValRegs; + + // IndirectStoresToEmit - The set of stores to emit after the inline asm node. + std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; + + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + switch (OpInfo.Type) { + case InlineAsm::isOutput: { + if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && + OpInfo.ConstraintType != TargetLowering::C_Register) { + // Memory output, or 'other' output (e.g. 'X' constraint). + assert(OpInfo.isIndirect && "Memory output must be indirect operand"); + + // Add information to the INLINEASM node to know about this output. + unsigned ResOpType = 4/*MEM*/ | (1<<3); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.push_back(OpInfo.CallOperand); + break; + } + + // Otherwise, this is a register or register class output. + + // Copy the output from the appropriate register. Find a register that + // we can use. + if (OpInfo.AssignedRegs.Regs.empty()) { + cerr << "llvm: error: Couldn't allocate output reg for constraint '" + << OpInfo.ConstraintCode << "'!\n"; + exit(1); + } + + // If this is an indirect operand, store through the pointer after the + // asm. + if (OpInfo.isIndirect) { + IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, + OpInfo.CallOperandVal)); + } else { + // This is the result value of the call. + assert(CS.getType() != Type::VoidTy && "Bad inline asm!"); + // Concatenate this output onto the outputs list. + RetValRegs.append(OpInfo.AssignedRegs); + } + + // Add information to the INLINEASM node to know that this register is + // set. + OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? + 6 /* EARLYCLOBBER REGDEF */ : + 2 /* REGDEF */ , + false, + 0, + DAG, AsmNodeOperands); + break; + } + case InlineAsm::isInput: { + SDValue InOperandVal = OpInfo.CallOperand; + + if (OpInfo.isMatchingInputConstraint()) { // Matching constraint? + // If this is required to match an output register we have already set, + // just use its register. + unsigned OperandNo = OpInfo.getMatchedOperand(); + + // Scan until we find the definition we already emitted of this operand. + // When we find it, create a RegsForValue operand. + unsigned CurOp = 2; // The first operand. + for (; OperandNo; --OperandNo) { + // Advance to the next operand. + unsigned OpFlag = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + assert(((OpFlag & 7) == 2 /*REGDEF*/ || + (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ || + (OpFlag & 7) == 4 /*MEM*/) && + "Skipped past definitions?"); + CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; + } + + unsigned OpFlag = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + if ((OpFlag & 7) == 2 /*REGDEF*/ + || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) { + // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. + assert(!OpInfo.isIndirect && + "Don't know how to handle tied indirect register inputs yet!"); + RegsForValue MatchedRegs; + MatchedRegs.TLI = &TLI; + MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); + MVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); + MatchedRegs.RegVTs.push_back(RegVT); + MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); + for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); + i != e; ++i) + MatchedRegs.Regs. + push_back(RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT))); + + // Use the produced MatchedRegs object to + MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + Chain, &Flag); + MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, + true, OpInfo.getMatchedOperand(), + DAG, AsmNodeOperands); + break; + } else { + assert(((OpFlag & 7) == 4) && "Unknown matching constraint!"); + assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 && + "Unexpected number of operands"); + // Add information to the INLINEASM node to know about this input. + // See InlineAsm.h isUseOperandTiedToDef. + OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, + TLI.getPointerTy())); + AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); + break; + } + } + + if (OpInfo.ConstraintType == TargetLowering::C_Other) { + assert(!OpInfo.isIndirect && + "Don't know how to handle indirect other inputs yet!"); + + std::vector<SDValue> Ops; + TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], + hasMemory, Ops, DAG); + if (Ops.empty()) { + cerr << "llvm: error: Invalid operand for inline asm constraint '" + << OpInfo.ConstraintCode << "'!\n"; + exit(1); + } + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); + break; + } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); + assert(InOperandVal.getValueType() == TLI.getPointerTy() && + "Memory operands expect pointer values"); + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = 4/*MEM*/ | (1<<3); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + TLI.getPointerTy())); + AsmNodeOperands.push_back(InOperandVal); + break; + } + + assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || + OpInfo.ConstraintType == TargetLowering::C_Register) && + "Unknown constraint type!"); + assert(!OpInfo.isIndirect && + "Don't know how to handle indirect register inputs yet!"); + + // Copy the input into the appropriate registers. + if (OpInfo.AssignedRegs.Regs.empty()) { + cerr << "llvm: error: Couldn't allocate output reg for constraint '" + << OpInfo.ConstraintCode << "'!\n"; + exit(1); + } + + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + Chain, &Flag); + + OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0, + DAG, AsmNodeOperands); + break; + } + case InlineAsm::isClobber: { + // Add the clobbered value to the operand list, so that the register + // allocator is aware that the physreg got clobbered. + if (!OpInfo.AssignedRegs.Regs.empty()) + OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */, + false, 0, DAG,AsmNodeOperands); + break; + } + } + } + + // Finish up input operands. + AsmNodeOperands[0] = Chain; + if (Flag.getNode()) AsmNodeOperands.push_back(Flag); + + Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), + DAG.getVTList(MVT::Other, MVT::Flag), + &AsmNodeOperands[0], AsmNodeOperands.size()); + Flag = Chain.getValue(1); + + // If this asm returns a register value, copy the result from that register + // and set it as the value of the call. + if (!RetValRegs.Regs.empty()) { + SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(), + Chain, &Flag); + + // FIXME: Why don't we do this for inline asms with MRVs? + if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { + MVT ResultType = TLI.getValueType(CS.getType()); + + // If any of the results of the inline asm is a vector, it may have the + // wrong width/num elts. This can happen for register classes that can + // contain multiple different value types. The preg or vreg allocated may + // not have the same VT as was expected. Convert it to the right type + // with bit_convert. + if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { + Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + ResultType, Val); + + } else if (ResultType != Val.getValueType() && + ResultType.isInteger() && Val.getValueType().isInteger()) { + // If a result value was tied to an input value, the computed result may + // have a wider width than the expected result. Extract the relevant + // portion. + Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val); + } + + assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); + } + + setValue(CS.getInstruction(), Val); + // Don't need to use this as a chain in this case. + if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) + return; + } + + std::vector<std::pair<SDValue, Value*> > StoresToEmit; + + // Process indirect outputs, first output all of the flagged copies out of + // physregs. + for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { + RegsForValue &OutRegs = IndirectStoresToEmit[i].first; + Value *Ptr = IndirectStoresToEmit[i].second; + SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(), + Chain, &Flag); + StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); + + } + + // Emit the non-flagged stores from the physregs. + SmallVector<SDValue, 8> OutChains; + for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) + OutChains.push_back(DAG.getStore(Chain, getCurDebugLoc(), + StoresToEmit[i].first, + getValue(StoresToEmit[i].second), + StoresToEmit[i].second, 0)); + if (!OutChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &OutChains[0], OutChains.size()); + DAG.setRoot(Chain); +} + + +void SelectionDAGLowering::visitMalloc(MallocInst &I) { + SDValue Src = getValue(I.getOperand(0)); + + // Scale up by the type size in the original i32 type width. Various + // mid-level optimizers may make assumptions about demanded bits etc from the + // i32-ness of the optimizer: we do not want to promote to i64 and then + // multiply on 64-bit targets. + // FIXME: Malloc inst should go away: PR715. + uint64_t ElementSize = TD->getTypeAllocSize(I.getType()->getElementType()); + if (ElementSize != 1) + Src = DAG.getNode(ISD::MUL, getCurDebugLoc(), Src.getValueType(), + Src, DAG.getConstant(ElementSize, Src.getValueType())); + + MVT IntPtr = TLI.getPointerTy(); + + if (IntPtr.bitsLT(Src.getValueType())) + Src = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), IntPtr, Src); + else if (IntPtr.bitsGT(Src.getValueType())) + Src = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), IntPtr, Src); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Src; + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Args.push_back(Entry); + + std::pair<SDValue,SDValue> Result = + TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false, + CallingConv::C, PerformTailCallOpt, + DAG.getExternalSymbol("malloc", IntPtr), + Args, DAG, getCurDebugLoc()); + setValue(&I, Result.first); // Pointers always fit in registers + DAG.setRoot(Result.second); +} + +void SelectionDAGLowering::visitFree(FreeInst &I) { + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = getValue(I.getOperand(0)); + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Args.push_back(Entry); + MVT IntPtr = TLI.getPointerTy(); + std::pair<SDValue,SDValue> Result = + TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, false, false, + CallingConv::C, PerformTailCallOpt, + DAG.getExternalSymbol("free", IntPtr), Args, DAG, + getCurDebugLoc()); + DAG.setRoot(Result.second); +} + +void SelectionDAGLowering::visitVAStart(CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getOperand(1)), + DAG.getSrcValue(I.getOperand(1)))); +} + +void SelectionDAGLowering::visitVAArg(VAArgInst &I) { + SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), + getRoot(), getValue(I.getOperand(0)), + DAG.getSrcValue(I.getOperand(0))); + setValue(&I, V); + DAG.setRoot(V.getValue(1)); +} + +void SelectionDAGLowering::visitVAEnd(CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getOperand(1)), + DAG.getSrcValue(I.getOperand(1)))); +} + +void SelectionDAGLowering::visitVACopy(CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), + MVT::Other, getRoot(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2)), + DAG.getSrcValue(I.getOperand(1)), + DAG.getSrcValue(I.getOperand(2)))); +} + +/// TargetLowering::LowerArguments - This is the default LowerArguments +/// implementation, which just inserts a FORMAL_ARGUMENTS node. FIXME: When all +/// targets are migrated to using FORMAL_ARGUMENTS, this hook should be +/// integrated into SDISel. +void TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &ArgValues, + DebugLoc dl) { + // Add CC# and isVararg as operands to the FORMAL_ARGUMENTS node. + SmallVector<SDValue, 3+16> Ops; + Ops.push_back(DAG.getRoot()); + Ops.push_back(DAG.getConstant(F.getCallingConv(), getPointerTy())); + Ops.push_back(DAG.getConstant(F.isVarArg(), getPointerTy())); + + // Add one result value for each formal argument. + SmallVector<MVT, 16> RetVals; + unsigned j = 1; + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); + I != E; ++I, ++j) { + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(*this, I->getType(), ValueVTs); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + MVT VT = ValueVTs[Value]; + const Type *ArgTy = VT.getTypeForMVT(); + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = + getTargetData()->getABITypeAlignment(ArgTy); + + if (F.paramHasAttr(j, Attribute::ZExt)) + Flags.setZExt(); + if (F.paramHasAttr(j, Attribute::SExt)) + Flags.setSExt(); + if (F.paramHasAttr(j, Attribute::InReg)) + Flags.setInReg(); + if (F.paramHasAttr(j, Attribute::StructRet)) + Flags.setSRet(); + if (F.paramHasAttr(j, Attribute::ByVal)) { + Flags.setByVal(); + const PointerType *Ty = cast<PointerType>(I->getType()); + const Type *ElementTy = Ty->getElementType(); + unsigned FrameAlign = getByValTypeAlignment(ElementTy); + unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy); + // For ByVal, alignment should be passed from FE. BE will guess if + // this info is not there but there are cases it cannot get right. + if (F.getParamAlignment(j)) + FrameAlign = F.getParamAlignment(j); + Flags.setByValAlign(FrameAlign); + Flags.setByValSize(FrameSize); + } + if (F.paramHasAttr(j, Attribute::Nest)) + Flags.setNest(); + Flags.setOrigAlign(OriginalAlignment); + + MVT RegisterVT = getRegisterType(VT); + unsigned NumRegs = getNumRegisters(VT); + for (unsigned i = 0; i != NumRegs; ++i) { + RetVals.push_back(RegisterVT); + ISD::ArgFlagsTy MyFlags = Flags; + if (NumRegs > 1 && i == 0) + MyFlags.setSplit(); + // if it isn't first piece, alignment must be 1 + else if (i > 0) + MyFlags.setOrigAlign(1); + Ops.push_back(DAG.getArgFlags(MyFlags)); + } + } + } + + RetVals.push_back(MVT::Other); + + // Create the node. + SDNode *Result = DAG.getNode(ISD::FORMAL_ARGUMENTS, dl, + DAG.getVTList(&RetVals[0], RetVals.size()), + &Ops[0], Ops.size()).getNode(); + + // Prelower FORMAL_ARGUMENTS. This isn't required for functionality, but + // allows exposing the loads that may be part of the argument access to the + // first DAGCombiner pass. + SDValue TmpRes = LowerOperation(SDValue(Result, 0), DAG); + + // The number of results should match up, except that the lowered one may have + // an extra flag result. + assert((Result->getNumValues() == TmpRes.getNode()->getNumValues() || + (Result->getNumValues()+1 == TmpRes.getNode()->getNumValues() && + TmpRes.getValue(Result->getNumValues()).getValueType() == MVT::Flag)) + && "Lowering produced unexpected number of results!"); + + // The FORMAL_ARGUMENTS node itself is likely no longer needed. + if (Result != TmpRes.getNode() && Result->use_empty()) { + HandleSDNode Dummy(DAG.getRoot()); + DAG.RemoveDeadNode(Result); + } + + Result = TmpRes.getNode(); + + unsigned NumArgRegs = Result->getNumValues() - 1; + DAG.setRoot(SDValue(Result, NumArgRegs)); + + // Set up the return result vector. + unsigned i = 0; + unsigned Idx = 1; + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; + ++I, ++Idx) { + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(*this, I->getType(), ValueVTs); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + MVT VT = ValueVTs[Value]; + MVT PartVT = getRegisterType(VT); + + unsigned NumParts = getNumRegisters(VT); + SmallVector<SDValue, 4> Parts(NumParts); + for (unsigned j = 0; j != NumParts; ++j) + Parts[j] = SDValue(Result, i++); + + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (F.paramHasAttr(Idx, Attribute::SExt)) + AssertOp = ISD::AssertSext; + else if (F.paramHasAttr(Idx, Attribute::ZExt)) + AssertOp = ISD::AssertZext; + + ArgValues.push_back(getCopyFromParts(DAG, dl, &Parts[0], NumParts, + PartVT, VT, AssertOp)); + } + } + assert(i == NumArgRegs && "Argument register count mismatch!"); +} + + +/// TargetLowering::LowerCallTo - This is the default LowerCallTo +/// implementation, which just inserts an ISD::CALL node, which is later custom +/// lowered by the target to something concrete. FIXME: When all targets are +/// migrated to using ISD::CALL, this hook should be integrated into SDISel. +std::pair<SDValue, SDValue> +TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, + bool RetSExt, bool RetZExt, bool isVarArg, + bool isInreg, + unsigned CallingConv, bool isTailCall, + SDValue Callee, + ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) { + assert((!isTailCall || PerformTailCallOpt) && + "isTailCall set when tail-call optimizations are disabled!"); + + SmallVector<SDValue, 32> Ops; + Ops.push_back(Chain); // Op#0 - Chain + Ops.push_back(Callee); + + // Handle all of the outgoing arguments. + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(*this, Args[i].Ty, ValueVTs); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + MVT VT = ValueVTs[Value]; + const Type *ArgTy = VT.getTypeForMVT(); + SDValue Op = SDValue(Args[i].Node.getNode(), + Args[i].Node.getResNo() + Value); + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = + getTargetData()->getABITypeAlignment(ArgTy); + + if (Args[i].isZExt) + Flags.setZExt(); + if (Args[i].isSExt) + Flags.setSExt(); + if (Args[i].isInReg) + Flags.setInReg(); + if (Args[i].isSRet) + Flags.setSRet(); + if (Args[i].isByVal) { + Flags.setByVal(); + const PointerType *Ty = cast<PointerType>(Args[i].Ty); + const Type *ElementTy = Ty->getElementType(); + unsigned FrameAlign = getByValTypeAlignment(ElementTy); + unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy); + // For ByVal, alignment should come from FE. BE will guess if this + // info is not there but there are cases it cannot get right. + if (Args[i].Alignment) + FrameAlign = Args[i].Alignment; + Flags.setByValAlign(FrameAlign); + Flags.setByValSize(FrameSize); + } + if (Args[i].isNest) + Flags.setNest(); + Flags.setOrigAlign(OriginalAlignment); + + MVT PartVT = getRegisterType(VT); + unsigned NumParts = getNumRegisters(VT); + SmallVector<SDValue, 4> Parts(NumParts); + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (Args[i].isSExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (Args[i].isZExt) + ExtendKind = ISD::ZERO_EXTEND; + + getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind); + + for (unsigned i = 0; i != NumParts; ++i) { + // if it isn't first piece, alignment must be 1 + ISD::ArgFlagsTy MyFlags = Flags; + if (NumParts > 1 && i == 0) + MyFlags.setSplit(); + else if (i != 0) + MyFlags.setOrigAlign(1); + + Ops.push_back(Parts[i]); + Ops.push_back(DAG.getArgFlags(MyFlags)); + } + } + } + + // Figure out the result value types. We start by making a list of + // the potentially illegal return value types. + SmallVector<MVT, 4> LoweredRetTys; + SmallVector<MVT, 4> RetTys; + ComputeValueVTs(*this, RetTy, RetTys); + + // Then we translate that to a list of legal types. + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + MVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(VT); + unsigned NumRegs = getNumRegisters(VT); + for (unsigned i = 0; i != NumRegs; ++i) + LoweredRetTys.push_back(RegisterVT); + } + + LoweredRetTys.push_back(MVT::Other); // Always has a chain. + + // Create the CALL node. + SDValue Res = DAG.getCall(CallingConv, dl, + isVarArg, isTailCall, isInreg, + DAG.getVTList(&LoweredRetTys[0], + LoweredRetTys.size()), + &Ops[0], Ops.size() + ); + Chain = Res.getValue(LoweredRetTys.size() - 1); + + // Gather up the call result into a single value. + if (RetTy != Type::VoidTy && !RetTys.empty()) { + ISD::NodeType AssertOp = ISD::DELETED_NODE; + + if (RetSExt) + AssertOp = ISD::AssertSext; + else if (RetZExt) + AssertOp = ISD::AssertZext; + + SmallVector<SDValue, 4> ReturnValues; + unsigned RegNo = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + MVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(VT); + unsigned NumRegs = getNumRegisters(VT); + unsigned RegNoEnd = NumRegs + RegNo; + SmallVector<SDValue, 4> Results; + for (; RegNo != RegNoEnd; ++RegNo) + Results.push_back(Res.getValue(RegNo)); + SDValue ReturnValue = + getCopyFromParts(DAG, dl, &Results[0], NumRegs, RegisterVT, VT, + AssertOp); + ReturnValues.push_back(ReturnValue); + } + Res = DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&RetTys[0], RetTys.size()), + &ReturnValues[0], ReturnValues.size()); + } + + return std::make_pair(Res, Chain); +} + +void TargetLowering::LowerOperationWrapper(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) { + SDValue Res = LowerOperation(SDValue(N, 0), DAG); + if (Res.getNode()) + Results.push_back(Res); +} + +SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + assert(0 && "LowerOperation not implemented for this target!"); + abort(); + return SDValue(); +} + + +void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) { + SDValue Op = getValue(V); + assert((Op.getOpcode() != ISD::CopyFromReg || + cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && + "Copy from a reg to the same reg!"); + assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); + + RegsForValue RFV(TLI, Reg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0); + PendingExports.push_back(Chain); +} + +#include "llvm/CodeGen/SelectionDAGISel.h" + +void SelectionDAGISel:: +LowerArguments(BasicBlock *LLVMBB) { + // If this is the entry block, emit arguments. + Function &F = *LLVMBB->getParent(); + SDValue OldRoot = SDL->DAG.getRoot(); + SmallVector<SDValue, 16> Args; + TLI.LowerArguments(F, SDL->DAG, Args, SDL->getCurDebugLoc()); + + unsigned a = 0; + for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); + AI != E; ++AI) { + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(TLI, AI->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (!AI->use_empty()) { + SDL->setValue(AI, SDL->DAG.getMergeValues(&Args[a], NumValues, + SDL->getCurDebugLoc())); + // If this argument is live outside of the entry block, insert a copy from + // whereever we got it to the vreg that other BB's will reference it as. + SDL->CopyToExportRegsIfNeeded(AI); + } + a += NumValues; + } + + // Finally, if the target has anything special to do, allow it to do so. + // FIXME: this should insert code into the DAG! + EmitFunctionEntryCode(F, SDL->DAG.getMachineFunction()); +} + +/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to +/// ensure constants are generated when needed. Remember the virtual registers +/// that need to be added to the Machine PHI nodes as input. We cannot just +/// directly add them, because expansion might result in multiple MBB's for one +/// BB. As such, the start of the BB might correspond to a different MBB than +/// the end. +/// +void +SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) { + TerminatorInst *TI = LLVMBB->getTerminator(); + + SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB)) continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + PHINode *PN; + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::iterator I = SuccBB->begin(); + (PN = dyn_cast<PHINode>(I)); ++I) { + // Ignore dead phi's. + if (PN->use_empty()) continue; + + unsigned Reg; + Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + if (Constant *C = dyn_cast<Constant>(PHIOp)) { + unsigned &RegOut = SDL->ConstantsOut[C]; + if (RegOut == 0) { + RegOut = FuncInfo->CreateRegForValue(C); + SDL->CopyValueToVirtualRegister(C, RegOut); + } + Reg = RegOut; + } else { + Reg = FuncInfo->ValueMap[PHIOp]; + if (Reg == 0) { + assert(isa<AllocaInst>(PHIOp) && + FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && + "Didn't codegen value into a register!??"); + Reg = FuncInfo->CreateRegForValue(PHIOp); + SDL->CopyValueToVirtualRegister(PHIOp, Reg); + } + } + + // Remember that this register needs to added to the machine PHI node as + // the input for this MBB. + SmallVector<MVT, 4> ValueVTs; + ComputeValueVTs(TLI, PN->getType(), ValueVTs); + for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { + MVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(VT); + for (unsigned i = 0, e = NumRegisters; i != e; ++i) + SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); + Reg += NumRegisters; + } + } + } + SDL->ConstantsOut.clear(); +} + +/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only +/// supports legal types, and it emits MachineInstrs directly instead of +/// creating SelectionDAG nodes. +/// +bool +SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB, + FastISel *F) { + TerminatorInst *TI = LLVMBB->getTerminator(); + + SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; + unsigned OrigNumPHINodesToUpdate = SDL->PHINodesToUpdate.size(); + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB)) continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + PHINode *PN; + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::iterator I = SuccBB->begin(); + (PN = dyn_cast<PHINode>(I)); ++I) { + // Ignore dead phi's. + if (PN->use_empty()) continue; + + // Only handle legal types. Two interesting things to note here. First, + // by bailing out early, we may leave behind some dead instructions, + // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its + // own moves. Second, this check is necessary becuase FastISel doesn't + // use CreateRegForValue to create registers, so it always creates + // exactly one register for each non-void instruction. + MVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); + if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { + // Promote MVT::i1. + if (VT == MVT::i1) + VT = TLI.getTypeToTransformTo(VT); + else { + SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + return false; + } + } + + Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + unsigned Reg = F->getRegForValue(PHIOp); + if (Reg == 0) { + SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + return false; + } + SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + } + } + + return true; +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h new file mode 100644 index 000000000000..578aa591ce67 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h @@ -0,0 +1,558 @@ +//===-- SelectionDAGBuild.h - Selection-DAG building ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating from LLVM IR into SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#ifndef SELECTIONDAGBUILD_H +#define SELECTIONDAGBUILD_H + +#include "llvm/Constants.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#ifndef NDEBUG +#include "llvm/ADT/SmallSet.h" +#endif +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Target/TargetMachine.h" +#include <vector> +#include <set> + +namespace llvm { + +class AliasAnalysis; +class AllocaInst; +class BasicBlock; +class BitCastInst; +class BranchInst; +class CallInst; +class ExtractElementInst; +class ExtractValueInst; +class FCmpInst; +class FPExtInst; +class FPToSIInst; +class FPToUIInst; +class FPTruncInst; +class FreeInst; +class Function; +class GetElementPtrInst; +class GCFunctionInfo; +class ICmpInst; +class IntToPtrInst; +class InvokeInst; +class InsertElementInst; +class InsertValueInst; +class Instruction; +class LoadInst; +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class MachineModuleInfo; +class MachineRegisterInfo; +class MallocInst; +class PHINode; +class PtrToIntInst; +class ReturnInst; +class SDISelAsmOperandInfo; +class SExtInst; +class SelectInst; +class ShuffleVectorInst; +class SIToFPInst; +class StoreInst; +class SwitchInst; +class TargetData; +class TargetLowering; +class TruncInst; +class UIToFPInst; +class UnreachableInst; +class UnwindInst; +class VICmpInst; +class VFCmpInst; +class VAArgInst; +class ZExtInst; + +//===--------------------------------------------------------------------===// +/// FunctionLoweringInfo - This contains information that is global to a +/// function that is used when lowering a region of the function. +/// +class FunctionLoweringInfo { +public: + TargetLowering &TLI; + Function *Fn; + MachineFunction *MF; + MachineRegisterInfo *RegInfo; + + explicit FunctionLoweringInfo(TargetLowering &TLI); + + /// set - Initialize this FunctionLoweringInfo with the given Function + /// and its associated MachineFunction. + /// + void set(Function &Fn, MachineFunction &MF, SelectionDAG &DAG, + bool EnableFastISel); + + /// MBBMap - A mapping from LLVM basic blocks to their machine code entry. + DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap; + + /// ValueMap - Since we emit code for the function a basic block at a time, + /// we must remember which virtual registers hold the values for + /// cross-basic-block values. + DenseMap<const Value*, unsigned> ValueMap; + + /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in + /// the entry block. This allows the allocas to be efficiently referenced + /// anywhere in the function. + DenseMap<const AllocaInst*, int> StaticAllocaMap; + +#ifndef NDEBUG + SmallSet<Instruction*, 8> CatchInfoLost; + SmallSet<Instruction*, 8> CatchInfoFound; +#endif + + unsigned MakeReg(MVT VT); + + /// isExportedInst - Return true if the specified value is an instruction + /// exported from its block. + bool isExportedInst(const Value *V) { + return ValueMap.count(V); + } + + unsigned CreateRegForValue(const Value *V); + + unsigned InitializeRegForValue(const Value *V) { + unsigned &R = ValueMap[V]; + assert(R == 0 && "Already initialized this value register!"); + return R = CreateRegForValue(V); + } + + struct LiveOutInfo { + unsigned NumSignBits; + APInt KnownOne, KnownZero; + LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {} + }; + + /// LiveOutRegInfo - Information about live out vregs, indexed by their + /// register number offset by 'FirstVirtualRegister'. + std::vector<LiveOutInfo> LiveOutRegInfo; + + /// clear - Clear out all the function-specific state. This returns this + /// FunctionLoweringInfo to an empty state, ready to be used for a + /// different function. + void clear() { + MBBMap.clear(); + ValueMap.clear(); + StaticAllocaMap.clear(); +#ifndef NDEBUG + CatchInfoLost.clear(); + CatchInfoFound.clear(); +#endif + LiveOutRegInfo.clear(); + } +}; + +//===----------------------------------------------------------------------===// +/// SelectionDAGLowering - This is the common target-independent lowering +/// implementation that is parameterized by a TargetLowering object. +/// Also, targets can overload any lowering method. +/// +class SelectionDAGLowering { + MachineBasicBlock *CurMBB; + + /// CurDebugLoc - current file + line number. Changes as we build the DAG. + DebugLoc CurDebugLoc; + + DenseMap<const Value*, SDValue> NodeMap; + + /// PendingLoads - Loads are not emitted to the program immediately. We bunch + /// them up and then emit token factor nodes when possible. This allows us to + /// get simple disambiguation between loads without worrying about alias + /// analysis. + SmallVector<SDValue, 8> PendingLoads; + + /// PendingExports - CopyToReg nodes that copy values to virtual registers + /// for export to other blocks need to be emitted before any terminator + /// instruction, but they have no other ordering requirements. We bunch them + /// up and the emit a single tokenfactor for them just before terminator + /// instructions. + SmallVector<SDValue, 8> PendingExports; + + /// Case - A struct to record the Value for a switch case, and the + /// case's target basic block. + struct Case { + Constant* Low; + Constant* High; + MachineBasicBlock* BB; + + Case() : Low(0), High(0), BB(0) { } + Case(Constant* low, Constant* high, MachineBasicBlock* bb) : + Low(low), High(high), BB(bb) { } + uint64_t size() const { + uint64_t rHigh = cast<ConstantInt>(High)->getSExtValue(); + uint64_t rLow = cast<ConstantInt>(Low)->getSExtValue(); + return (rHigh - rLow + 1ULL); + } + }; + + struct CaseBits { + uint64_t Mask; + MachineBasicBlock* BB; + unsigned Bits; + + CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits): + Mask(mask), BB(bb), Bits(bits) { } + }; + + typedef std::vector<Case> CaseVector; + typedef std::vector<CaseBits> CaseBitsVector; + typedef CaseVector::iterator CaseItr; + typedef std::pair<CaseItr, CaseItr> CaseRange; + + /// CaseRec - A struct with ctor used in lowering switches to a binary tree + /// of conditional branches. + struct CaseRec { + CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) : + CaseBB(bb), LT(lt), GE(ge), Range(r) {} + + /// CaseBB - The MBB in which to emit the compare and branch + MachineBasicBlock *CaseBB; + /// LT, GE - If nonzero, we know the current case value must be less-than or + /// greater-than-or-equal-to these Constants. + Constant *LT; + Constant *GE; + /// Range - A pair of iterators representing the range of case values to be + /// processed at this point in the binary search tree. + CaseRange Range; + }; + + typedef std::vector<CaseRec> CaseRecVector; + + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator () (const Case& C1, const Case& C2) { + assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High)); + const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; + + struct CaseBitsCmp { + bool operator () (const CaseBits& C1, const CaseBits& C2) { + return C1.Bits > C2.Bits; + } + }; + + size_t Clusterify(CaseVector& Cases, const SwitchInst &SI); + + /// CaseBlock - This structure is used to communicate between SDLowering and + /// SDISel for the code generation of additional basic blocks needed by multi- + /// case switch statements. + struct CaseBlock { + CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle, + MachineBasicBlock *truebb, MachineBasicBlock *falsebb, + MachineBasicBlock *me) + : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), + TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {} + // CC - the condition code to use for the case block's setcc node + ISD::CondCode CC; + // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit. + // Emit by default LHS op RHS. MHS is used for range comparisons: + // If MHS is not null: (LHS <= MHS) and (MHS <= RHS). + Value *CmpLHS, *CmpMHS, *CmpRHS; + // TrueBB/FalseBB - the block to branch to if the setcc is true/false. + MachineBasicBlock *TrueBB, *FalseBB; + // ThisBB - the block into which to emit the code for the setcc and branches + MachineBasicBlock *ThisBB; + }; + struct JumpTable { + JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, + MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} + + /// Reg - the virtual register containing the index of the jump table entry + //. to jump to. + unsigned Reg; + /// JTI - the JumpTableIndex for this jump table in the function. + unsigned JTI; + /// MBB - the MBB into which to emit the code for the indirect jump. + MachineBasicBlock *MBB; + /// Default - the MBB of the default bb, which is a successor of the range + /// check MBB. This is when updating PHI nodes in successors. + MachineBasicBlock *Default; + }; + struct JumpTableHeader { + JumpTableHeader(APInt F, APInt L, Value* SV, MachineBasicBlock* H, + bool E = false): + First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {} + APInt First; + APInt Last; + Value *SValue; + MachineBasicBlock *HeaderBB; + bool Emitted; + }; + typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock; + + struct BitTestCase { + BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr): + Mask(M), ThisBB(T), TargetBB(Tr) { } + uint64_t Mask; + MachineBasicBlock* ThisBB; + MachineBasicBlock* TargetBB; + }; + + typedef SmallVector<BitTestCase, 3> BitTestInfo; + + struct BitTestBlock { + BitTestBlock(APInt F, APInt R, Value* SV, + unsigned Rg, bool E, + MachineBasicBlock* P, MachineBasicBlock* D, + const BitTestInfo& C): + First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E), + Parent(P), Default(D), Cases(C) { } + APInt First; + APInt Range; + Value *SValue; + unsigned Reg; + bool Emitted; + MachineBasicBlock *Parent; + MachineBasicBlock *Default; + BitTestInfo Cases; + }; + +public: + // TLI - This is information that describes the available target features we + // need for lowering. This indicates when operations are unavailable, + // implemented with a libcall, etc. + TargetLowering &TLI; + SelectionDAG &DAG; + const TargetData *TD; + AliasAnalysis *AA; + + /// SwitchCases - Vector of CaseBlock structures used to communicate + /// SwitchInst code generation information. + std::vector<CaseBlock> SwitchCases; + /// JTCases - Vector of JumpTable structures used to communicate + /// SwitchInst code generation information. + std::vector<JumpTableBlock> JTCases; + /// BitTestCases - Vector of BitTestBlock structures used to communicate + /// SwitchInst code generation information. + std::vector<BitTestBlock> BitTestCases; + + std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate; + + // Emit PHI-node-operand constants only once even if used by multiple + // PHI nodes. + DenseMap<Constant*, unsigned> ConstantsOut; + + /// FuncInfo - Information about the function as a whole. + /// + FunctionLoweringInfo &FuncInfo; + + /// OptLevel - What optimization level we're generating code for. + /// + CodeGenOpt::Level OptLevel; + + /// GFI - Garbage collection metadata for the function. + GCFunctionInfo *GFI; + + SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli, + FunctionLoweringInfo &funcinfo, + CodeGenOpt::Level ol) + : CurDebugLoc(DebugLoc::getUnknownLoc()), + TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol) { + } + + void init(GCFunctionInfo *gfi, AliasAnalysis &aa); + + /// clear - Clear out the curret SelectionDAG and the associated + /// state and prepare this SelectionDAGLowering object to be used + /// for a new block. This doesn't clear out information about + /// additional blocks that are needed to complete switch lowering + /// or PHI node updating; that information is cleared out as it is + /// consumed. + void clear(); + + /// getRoot - Return the current virtual root of the Selection DAG, + /// flushing any PendingLoad items. This must be done before emitting + /// a store or any other node that may need to be ordered after any + /// prior load instructions. + /// + SDValue getRoot(); + + /// getControlRoot - Similar to getRoot, but instead of flushing all the + /// PendingLoad items, flush all the PendingExports items. It is necessary + /// to do this before emitting a terminator instruction. + /// + SDValue getControlRoot(); + + DebugLoc getCurDebugLoc() const { return CurDebugLoc; } + void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; } + + void CopyValueToVirtualRegister(Value *V, unsigned Reg); + + void visit(Instruction &I); + + void visit(unsigned Opcode, User &I); + + void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; } + + SDValue getValue(const Value *V); + + void setValue(const Value *V, SDValue NewN) { + SDValue &N = NodeMap[V]; + assert(N.getNode() == 0 && "Already set a value for this node!"); + N = NewN; + } + + void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, + std::set<unsigned> &OutputRegs, + std::set<unsigned> &InputRegs); + + void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *CurBB, + unsigned Opc); + void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB); + bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); + bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB); + void CopyToExportRegsIfNeeded(Value *V); + void ExportFromCurrentBlock(Value *V); + void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall, + MachineBasicBlock *LandingPad = NULL); + +private: + // Terminator instructions. + void visitRet(ReturnInst &I); + void visitBr(BranchInst &I); + void visitSwitch(SwitchInst &I); + void visitUnreachable(UnreachableInst &I) { /* noop */ } + + // Helpers for visitSwitch + bool handleSmallSwitchRange(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); + bool handleJTSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); + bool handleBTSplitSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); + bool handleBitTestsSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default); +public: + void visitSwitchCase(CaseBlock &CB); + void visitBitTestHeader(BitTestBlock &B); + void visitBitTestCase(MachineBasicBlock* NextMBB, + unsigned Reg, + BitTestCase &B); + void visitJumpTable(JumpTable &JT); + void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH); + +private: + // These all get lowered before this pass. + void visitInvoke(InvokeInst &I); + void visitUnwind(UnwindInst &I); + + void visitBinary(User &I, unsigned OpCode); + void visitShift(User &I, unsigned Opcode); + void visitAdd(User &I); + void visitSub(User &I); + void visitMul(User &I); + void visitURem(User &I) { visitBinary(I, ISD::UREM); } + void visitSRem(User &I) { visitBinary(I, ISD::SREM); } + void visitFRem(User &I) { visitBinary(I, ISD::FREM); } + void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); } + void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); } + void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); } + void visitAnd (User &I) { visitBinary(I, ISD::AND); } + void visitOr (User &I) { visitBinary(I, ISD::OR); } + void visitXor (User &I) { visitBinary(I, ISD::XOR); } + void visitShl (User &I) { visitShift(I, ISD::SHL); } + void visitLShr(User &I) { visitShift(I, ISD::SRL); } + void visitAShr(User &I) { visitShift(I, ISD::SRA); } + void visitICmp(User &I); + void visitFCmp(User &I); + void visitVICmp(User &I); + void visitVFCmp(User &I); + // Visit the conversion instructions + void visitTrunc(User &I); + void visitZExt(User &I); + void visitSExt(User &I); + void visitFPTrunc(User &I); + void visitFPExt(User &I); + void visitFPToUI(User &I); + void visitFPToSI(User &I); + void visitUIToFP(User &I); + void visitSIToFP(User &I); + void visitPtrToInt(User &I); + void visitIntToPtr(User &I); + void visitBitCast(User &I); + + void visitExtractElement(User &I); + void visitInsertElement(User &I); + void visitShuffleVector(User &I); + + void visitExtractValue(ExtractValueInst &I); + void visitInsertValue(InsertValueInst &I); + + void visitGetElementPtr(User &I); + void visitSelect(User &I); + + void visitMalloc(MallocInst &I); + void visitFree(FreeInst &I); + void visitAlloca(AllocaInst &I); + void visitLoad(LoadInst &I); + void visitStore(StoreInst &I); + void visitPHI(PHINode &I) { } // PHI nodes are handled specially. + void visitCall(CallInst &I); + void visitInlineAsm(CallSite CS); + const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic); + void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic); + + void visitPow(CallInst &I); + void visitExp2(CallInst &I); + void visitExp(CallInst &I); + void visitLog(CallInst &I); + void visitLog2(CallInst &I); + void visitLog10(CallInst &I); + + void visitVAStart(CallInst &I); + void visitVAArg(VAArgInst &I); + void visitVAEnd(CallInst &I); + void visitVACopy(CallInst &I); + + void visitUserOp1(Instruction &I) { + assert(0 && "UserOp1 should not exist at instruction selection time!"); + abort(); + } + void visitUserOp2(Instruction &I) { + assert(0 && "UserOp2 should not exist at instruction selection time!"); + abort(); + } + + const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op); + const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op); +}; + +/// AddCatchInfo - Extract the personality and type infos from an eh.selector +/// call, and add them to the specified machine basic block. +void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, + MachineBasicBlock *MBB); + +} // end namespace llvm + +#endif diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp new file mode 100644 index 000000000000..9d72a128d18b --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -0,0 +1,1347 @@ +//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAGISel class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "isel" +#include "ScheduleDAGSDNodes.h" +#include "SelectionDAGBuild.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Constants.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Timer.h" +#include <algorithm> +using namespace llvm; + +static cl::opt<bool> +DisableLegalizeTypes("disable-legalize-types", cl::Hidden); +static cl::opt<bool> +EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, + cl::desc("Enable verbose messages in the \"fast\" " + "instruction selector")); +static cl::opt<bool> +EnableFastISelAbort("fast-isel-abort", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction fails")); +static cl::opt<bool> +SchedLiveInCopies("schedule-livein-copies", + cl::desc("Schedule copies of livein registers"), + cl::init(false)); + +#ifndef NDEBUG +static cl::opt<bool> +ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the first " + "dag combine pass")); +static cl::opt<bool> +ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize types")); +static cl::opt<bool> +ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize")); +static cl::opt<bool> +ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the second " + "dag combine pass")); +static cl::opt<bool> +ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the post legalize types" + " dag combine pass")); +static cl::opt<bool> +ViewISelDAGs("view-isel-dags", cl::Hidden, + cl::desc("Pop up a window to show isel dags as they are selected")); +static cl::opt<bool> +ViewSchedDAGs("view-sched-dags", cl::Hidden, + cl::desc("Pop up a window to show sched dags as they are processed")); +static cl::opt<bool> +ViewSUnitDAGs("view-sunit-dags", cl::Hidden, + cl::desc("Pop up a window to show SUnit dags after they are processed")); +#else +static const bool ViewDAGCombine1 = false, + ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false, + ViewDAGCombine2 = false, + ViewDAGCombineLT = false, + ViewISelDAGs = false, ViewSchedDAGs = false, + ViewSUnitDAGs = false; +#endif + +//===---------------------------------------------------------------------===// +/// +/// RegisterScheduler class - Track the registration of instruction schedulers. +/// +//===---------------------------------------------------------------------===// +MachinePassRegistry RegisterScheduler::Registry; + +//===---------------------------------------------------------------------===// +/// +/// ISHeuristic command line option for instruction schedulers. +/// +//===---------------------------------------------------------------------===// +static cl::opt<RegisterScheduler::FunctionPassCtor, false, + RegisterPassParser<RegisterScheduler> > +ISHeuristic("pre-RA-sched", + cl::init(&createDefaultScheduler), + cl::desc("Instruction schedulers available (before register" + " allocation):")); + +static RegisterScheduler +defaultListDAGScheduler("default", "Best scheduler for the target", + createDefaultScheduler); + +namespace llvm { + //===--------------------------------------------------------------------===// + /// createDefaultScheduler - This creates an instruction scheduler appropriate + /// for the target. + ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetLowering &TLI = IS->getTargetLowering(); + + if (OptLevel == CodeGenOpt::None) + return createFastDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency) + return createTDListDAGScheduler(IS, OptLevel); + assert(TLI.getSchedulingPreference() == + TargetLowering::SchedulingForRegPressure && "Unknown sched type!"); + return createBURRListDAGScheduler(IS, OptLevel); + } +} + +// EmitInstrWithCustomInserter - This method should be implemented by targets +// that mark instructions with the 'usesCustomDAGSchedInserter' flag. These +// instructions are special in various ways, which require special support to +// insert. The specified MachineInstr is created but not inserted into any +// basic blocks, and the scheduler passes ownership of it to this method. +MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const { + cerr << "If a target marks an instruction with " + << "'usesCustomDAGSchedInserter', it must implement " + << "TargetLowering::EmitInstrWithCustomInserter!\n"; + abort(); + return 0; +} + +/// EmitLiveInCopy - Emit a copy for a live in physical register. If the +/// physical register has only a single copy use, then coalesced the copy +/// if possible. +static void EmitLiveInCopy(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &InsertPos, + unsigned VirtReg, unsigned PhysReg, + const TargetRegisterClass *RC, + DenseMap<MachineInstr*, unsigned> &CopyRegMap, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { + unsigned NumUses = 0; + MachineInstr *UseMI = NULL; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg), + UE = MRI.use_end(); UI != UE; ++UI) { + UseMI = &*UI; + if (++NumUses > 1) + break; + } + + // If the number of uses is not one, or the use is not a move instruction, + // don't coalesce. Also, only coalesce away a virtual register to virtual + // register copy. + bool Coalesced = false; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (NumUses == 1 && + TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + TargetRegisterInfo::isVirtualRegister(DstReg)) { + VirtReg = DstReg; + Coalesced = true; + } + + // Now find an ideal location to insert the copy. + MachineBasicBlock::iterator Pos = InsertPos; + while (Pos != MBB->begin()) { + MachineInstr *PrevMI = prior(Pos); + DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI); + // copyRegToReg might emit multiple instructions to do a copy. + unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second; + if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg)) + // This is what the BB looks like right now: + // r1024 = mov r0 + // ... + // r1 = mov r1024 + // + // We want to insert "r1025 = mov r1". Inserting this copy below the + // move to r1024 makes it impossible for that move to be coalesced. + // + // r1025 = mov r1 + // r1024 = mov r0 + // ... + // r1 = mov 1024 + // r2 = mov 1025 + break; // Woot! Found a good location. + --Pos; + } + + TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC); + CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); + if (Coalesced) { + if (&*InsertPos == UseMI) ++InsertPos; + MBB->erase(UseMI); + } +} + +/// EmitLiveInCopies - If this is the first basic block in the function, +/// and if it has live ins that need to be copied into vregs, emit the +/// copies into the block. +static void EmitLiveInCopies(MachineBasicBlock *EntryMBB, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { + if (SchedLiveInCopies) { + // Emit the copies at a heuristically-determined location in the block. + DenseMap<MachineInstr*, unsigned> CopyRegMap; + MachineBasicBlock::iterator InsertPos = EntryMBB->begin(); + for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), + E = MRI.livein_end(); LI != E; ++LI) + if (LI->second) { + const TargetRegisterClass *RC = MRI.getRegClass(LI->second); + EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first, + RC, CopyRegMap, MRI, TRI, TII); + } + } else { + // Emit the copies into the top of the block. + for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), + E = MRI.livein_end(); LI != E; ++LI) + if (LI->second) { + const TargetRegisterClass *RC = MRI.getRegClass(LI->second); + TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), + LI->second, LI->first, RC, RC); + } + } +} + +//===----------------------------------------------------------------------===// +// SelectionDAGISel code +//===----------------------------------------------------------------------===// + +SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : + FunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), + FuncInfo(new FunctionLoweringInfo(TLI)), + CurDAG(new SelectionDAG(TLI, *FuncInfo)), + SDL(new SelectionDAGLowering(*CurDAG, TLI, *FuncInfo, OL)), + GFI(), + OptLevel(OL), + DAGSize(0) +{} + +SelectionDAGISel::~SelectionDAGISel() { + delete SDL; + delete CurDAG; + delete FuncInfo; +} + +unsigned SelectionDAGISel::MakeReg(MVT VT) { + return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); +} + +void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.addRequired<GCModuleInfo>(); + AU.addRequired<DwarfWriter>(); + AU.setPreservesAll(); +} + +bool SelectionDAGISel::runOnFunction(Function &Fn) { + // Do some sanity-checking on the command-line options. + assert((!EnableFastISelVerbose || EnableFastISel) && + "-fast-isel-verbose requires -fast-isel"); + assert((!EnableFastISelAbort || EnableFastISel) && + "-fast-isel-abort requires -fast-isel"); + + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (Fn.hasAvailableExternallyLinkage()) + return false; + + + // Get alias analysis for load/store combining. + AA = &getAnalysis<AliasAnalysis>(); + + TargetMachine &TM = TLI.getTargetMachine(); + MF = &MachineFunction::construct(&Fn, TM); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + + if (MF->getFunction()->hasGC()) + GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF->getFunction()); + else + GFI = 0; + RegInfo = &MF->getRegInfo(); + DOUT << "\n\n\n=== " << Fn.getName() << "\n"; + + MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>(); + CurDAG->init(*MF, MMI, DW); + FuncInfo->set(Fn, *MF, *CurDAG, EnableFastISel); + SDL->init(GFI, *AA); + + for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator())) + // Mark landing pad. + FuncInfo->MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); + + SelectAllBasicBlocks(Fn, *MF, MMI, DW, TII); + + // If the first basic block in the function has live ins that need to be + // copied into vregs, emit the copies into the top of the block before + // emitting the code for the block. + EmitLiveInCopies(MF->begin(), *RegInfo, TRI, TII); + + // Add function live-ins to entry block live-in set. + for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(), + E = RegInfo->livein_end(); I != E; ++I) + MF->begin()->addLiveIn(I->first); + +#ifndef NDEBUG + assert(FuncInfo->CatchInfoFound.size() == FuncInfo->CatchInfoLost.size() && + "Not all catch info was assigned to a landing pad!"); +#endif + + FuncInfo->clear(); + + return true; +} + +static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, + MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { + for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I) + if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) { + // Apply the catch info to DestBB. + AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]); +#ifndef NDEBUG + if (!FLI.MBBMap[SrcBB]->isLandingPad()) + FLI.CatchInfoFound.insert(EHSel); +#endif + } +} + +/// IsFixedFrameObjectWithPosOffset - Check if object is a fixed frame object and +/// whether object offset >= 0. +static bool +IsFixedFrameObjectWithPosOffset(MachineFrameInfo *MFI, SDValue Op) { + if (!isa<FrameIndexSDNode>(Op)) return false; + + FrameIndexSDNode * FrameIdxNode = dyn_cast<FrameIndexSDNode>(Op); + int FrameIdx = FrameIdxNode->getIndex(); + return MFI->isFixedObjectIndex(FrameIdx) && + MFI->getObjectOffset(FrameIdx) >= 0; +} + +/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could +/// possibly be overwritten when lowering the outgoing arguments in a tail +/// call. Currently the implementation of this call is very conservative and +/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with +/// virtual registers would be overwritten by direct lowering. +static bool IsPossiblyOverwrittenArgumentOfTailCall(SDValue Op, + MachineFrameInfo *MFI) { + RegisterSDNode * OpReg = NULL; + if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS || + (Op.getOpcode()== ISD::CopyFromReg && + (OpReg = dyn_cast<RegisterSDNode>(Op.getOperand(1))) && + (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) || + (Op.getOpcode() == ISD::LOAD && + IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(1))) || + (Op.getOpcode() == ISD::MERGE_VALUES && + Op.getOperand(Op.getResNo()).getOpcode() == ISD::LOAD && + IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(Op.getResNo()). + getOperand(1)))) + return true; + return false; +} + +/// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the +/// DAG and fixes their tailcall attribute operand. +static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG, + const TargetLowering& TLI) { + SDNode * Ret = NULL; + SDValue Terminator = DAG.getRoot(); + + // Find RET node. + if (Terminator.getOpcode() == ISD::RET) { + Ret = Terminator.getNode(); + } + + // Fix tail call attribute of CALL nodes. + for (SelectionDAG::allnodes_iterator BE = DAG.allnodes_begin(), + BI = DAG.allnodes_end(); BI != BE; ) { + --BI; + if (CallSDNode *TheCall = dyn_cast<CallSDNode>(BI)) { + SDValue OpRet(Ret, 0); + SDValue OpCall(BI, 0); + bool isMarkedTailCall = TheCall->isTailCall(); + // If CALL node has tail call attribute set to true and the call is not + // eligible (no RET or the target rejects) the attribute is fixed to + // false. The TargetLowering::IsEligibleForTailCallOptimization function + // must correctly identify tail call optimizable calls. + if (!isMarkedTailCall) continue; + if (Ret==NULL || + !TLI.IsEligibleForTailCallOptimization(TheCall, OpRet, DAG)) { + // Not eligible. Mark CALL node as non tail call. Note that we + // can modify the call node in place since calls are not CSE'd. + TheCall->setNotTailCall(); + } else { + // Look for tail call clobbered arguments. Emit a series of + // copyto/copyfrom virtual register nodes to protect them. + SmallVector<SDValue, 32> Ops; + SDValue Chain = TheCall->getChain(), InFlag; + Ops.push_back(Chain); + Ops.push_back(TheCall->getCallee()); + for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) { + SDValue Arg = TheCall->getArg(i); + bool isByVal = TheCall->getArgFlags(i).isByVal(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + if (!isByVal && + IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) { + MVT VT = Arg.getValueType(); + unsigned VReg = MF.getRegInfo(). + createVirtualRegister(TLI.getRegClassFor(VT)); + Chain = DAG.getCopyToReg(Chain, Arg.getDebugLoc(), + VReg, Arg, InFlag); + InFlag = Chain.getValue(1); + Arg = DAG.getCopyFromReg(Chain, Arg.getDebugLoc(), + VReg, VT, InFlag); + Chain = Arg.getValue(1); + InFlag = Arg.getValue(2); + } + Ops.push_back(Arg); + Ops.push_back(TheCall->getArgFlagsVal(i)); + } + // Link in chain of CopyTo/CopyFromReg. + Ops[0] = Chain; + DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size()); + } + } + } +} + +void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, + BasicBlock::iterator Begin, + BasicBlock::iterator End) { + SDL->setCurrentBasicBlock(BB); + + // Lower all of the non-terminator instructions. + for (BasicBlock::iterator I = Begin; I != End; ++I) + if (!isa<TerminatorInst>(I)) + SDL->visit(*I); + + // Ensure that all instructions which are used outside of their defining + // blocks are available as virtual registers. Invoke is handled elsewhere. + for (BasicBlock::iterator I = Begin; I != End; ++I) + if (!isa<PHINode>(I) && !isa<InvokeInst>(I)) + SDL->CopyToExportRegsIfNeeded(I); + + // Handle PHI nodes in successor blocks. + if (End == LLVMBB->end()) { + HandlePHINodesInSuccessorBlocks(LLVMBB); + + // Lower the terminator after the copies are emitted. + SDL->visit(*LLVMBB->getTerminator()); + } + + // Make sure the root of the DAG is up-to-date. + CurDAG->setRoot(SDL->getControlRoot()); + + // Check whether calls in this block are real tail calls. Fix up CALL nodes + // with correct tailcall attribute so that the target can rely on the tailcall + // attribute indicating whether the call is really eligible for tail call + // optimization. + if (PerformTailCallOpt) + CheckDAGForTailCallsAndFixThem(*CurDAG, TLI); + + // Final step, emit the lowered DAG as machine code. + CodeGenAndEmitDAG(); + SDL->clear(); +} + +void SelectionDAGISel::ComputeLiveOutVRegInfo() { + SmallPtrSet<SDNode*, 128> VisitedNodes; + SmallVector<SDNode*, 128> Worklist; + + Worklist.push_back(CurDAG->getRoot().getNode()); + + APInt Mask; + APInt KnownZero; + APInt KnownOne; + + while (!Worklist.empty()) { + SDNode *N = Worklist.back(); + Worklist.pop_back(); + + // If we've already seen this node, ignore it. + if (!VisitedNodes.insert(N)) + continue; + + // Otherwise, add all chain operands to the worklist. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) + Worklist.push_back(N->getOperand(i).getNode()); + + // If this is a CopyToReg with a vreg dest, process it. + if (N->getOpcode() != ISD::CopyToReg) + continue; + + unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + continue; + + // Ignore non-scalar or non-integer values. + SDValue Src = N->getOperand(2); + MVT SrcVT = Src.getValueType(); + if (!SrcVT.isInteger() || SrcVT.isVector()) + continue; + + unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); + Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits()); + CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne); + + // Only install this information if it tells us something. + if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) { + DestReg -= TargetRegisterInfo::FirstVirtualRegister; + FunctionLoweringInfo &FLI = CurDAG->getFunctionLoweringInfo(); + if (DestReg >= FLI.LiveOutRegInfo.size()) + FLI.LiveOutRegInfo.resize(DestReg+1); + FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[DestReg]; + LOI.NumSignBits = NumSignBits; + LOI.KnownOne = KnownOne; + LOI.KnownZero = KnownZero; + } + } +} + +void SelectionDAGISel::CodeGenAndEmitDAG() { + std::string GroupName; + if (TimePassesIsEnabled) + GroupName = "Instruction Selection and Scheduling"; + std::string BlockName; + if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || + ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || + ViewSUnitDAGs) + BlockName = CurDAG->getMachineFunction().getFunction()->getName() + ':' + + BB->getBasicBlock()->getName(); + + DOUT << "Initial selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); + + // Run the DAG combiner in pre-legalize mode. + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Combining 1", GroupName); + CurDAG->Combine(Unrestricted, *AA, OptLevel); + } else { + CurDAG->Combine(Unrestricted, *AA, OptLevel); + } + + DOUT << "Optimized lowered selection DAG:\n"; + DEBUG(CurDAG->dump()); + + // Second step, hack on the DAG until it only uses operations and types that + // the target supports. + if (!DisableLegalizeTypes) { + if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " + + BlockName); + + bool Changed; + if (TimePassesIsEnabled) { + NamedRegionTimer T("Type Legalization", GroupName); + Changed = CurDAG->LegalizeTypes(); + } else { + Changed = CurDAG->LegalizeTypes(); + } + + DOUT << "Type-legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (Changed) { + if (ViewDAGCombineLT) + CurDAG->viewGraph("dag-combine-lt input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Combining after legalize types", GroupName); + CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); + } else { + CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); + } + + DOUT << "Optimized type-legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + } + + if (TimePassesIsEnabled) { + NamedRegionTimer T("Vector Legalization", GroupName); + Changed = CurDAG->LegalizeVectors(); + } else { + Changed = CurDAG->LegalizeVectors(); + } + + if (Changed) { + if (TimePassesIsEnabled) { + NamedRegionTimer T("Type Legalization 2", GroupName); + Changed = CurDAG->LegalizeTypes(); + } else { + Changed = CurDAG->LegalizeTypes(); + } + + if (ViewDAGCombineLT) + CurDAG->viewGraph("dag-combine-lv input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Combining after legalize vectors", GroupName); + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } else { + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } + + DOUT << "Optimized vector-legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + } + } + + if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); + + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Legalization", GroupName); + CurDAG->Legalize(DisableLegalizeTypes, OptLevel); + } else { + CurDAG->Legalize(DisableLegalizeTypes, OptLevel); + } + + DOUT << "Legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); + + // Run the DAG combiner in post-legalize mode. + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Combining 2", GroupName); + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } else { + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } + + DOUT << "Optimized legalized selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); + + if (OptLevel != CodeGenOpt::None) + ComputeLiveOutVRegInfo(); + + // Third, instruction select all of the operations to machine code, adding the + // code to the MachineBasicBlock. + if (TimePassesIsEnabled) { + NamedRegionTimer T("Instruction Selection", GroupName); + InstructionSelect(); + } else { + InstructionSelect(); + } + + DOUT << "Selected selection DAG:\n"; + DEBUG(CurDAG->dump()); + + if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); + + // Schedule machine code. + ScheduleDAGSDNodes *Scheduler = CreateScheduler(); + if (TimePassesIsEnabled) { + NamedRegionTimer T("Instruction Scheduling", GroupName); + Scheduler->Run(CurDAG, BB, BB->end()); + } else { + Scheduler->Run(CurDAG, BB, BB->end()); + } + + if (ViewSUnitDAGs) Scheduler->viewGraph(); + + // Emit machine code to BB. This can change 'BB' to the last block being + // inserted into. + if (TimePassesIsEnabled) { + NamedRegionTimer T("Instruction Creation", GroupName); + BB = Scheduler->EmitSchedule(); + } else { + BB = Scheduler->EmitSchedule(); + } + + // Free the scheduler state. + if (TimePassesIsEnabled) { + NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName); + delete Scheduler; + } else { + delete Scheduler; + } + + DOUT << "Selected machine code:\n"; + DEBUG(BB->dump()); +} + +void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, + MachineFunction &MF, + MachineModuleInfo *MMI, + DwarfWriter *DW, + const TargetInstrInfo &TII) { + // Initialize the Fast-ISel state, if needed. + FastISel *FastIS = 0; + if (EnableFastISel) + FastIS = TLI.createFastISel(MF, MMI, DW, + FuncInfo->ValueMap, + FuncInfo->MBBMap, + FuncInfo->StaticAllocaMap +#ifndef NDEBUG + , FuncInfo->CatchInfoLost +#endif + ); + + // Iterate over all basic blocks in the function. + for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + BasicBlock *LLVMBB = &*I; + BB = FuncInfo->MBBMap[LLVMBB]; + + BasicBlock::iterator const Begin = LLVMBB->begin(); + BasicBlock::iterator const End = LLVMBB->end(); + BasicBlock::iterator BI = Begin; + + // Lower any arguments needed in this block if this is the entry block. + bool SuppressFastISel = false; + if (LLVMBB == &Fn.getEntryBlock()) { + LowerArguments(LLVMBB); + + // If any of the arguments has the byval attribute, forgo + // fast-isel in the entry block. + if (FastIS) { + unsigned j = 1; + for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); + I != E; ++I, ++j) + if (Fn.paramHasAttr(j, Attribute::ByVal)) { + if (EnableFastISelVerbose || EnableFastISelAbort) + cerr << "FastISel skips entry block due to byval argument\n"; + SuppressFastISel = true; + break; + } + } + } + + if (MMI && BB->isLandingPad()) { + // Add a label to mark the beginning of the landing pad. Deletion of the + // landing pad can thus be detected via the MachineModuleInfo. + unsigned LabelID = MMI->addLandingPad(BB); + + const TargetInstrDesc &II = TII.get(TargetInstrInfo::EH_LABEL); + BuildMI(BB, SDL->getCurDebugLoc(), II).addImm(LabelID); + + // Mark exception register as live in. + unsigned Reg = TLI.getExceptionAddressRegister(); + if (Reg) BB->addLiveIn(Reg); + + // Mark exception selector register as live in. + Reg = TLI.getExceptionSelectorRegister(); + if (Reg) BB->addLiveIn(Reg); + + // FIXME: Hack around an exception handling flaw (PR1508): the personality + // function and list of typeids logically belong to the invoke (or, if you + // like, the basic block containing the invoke), and need to be associated + // with it in the dwarf exception handling tables. Currently however the + // information is provided by an intrinsic (eh.selector) that can be moved + // to unexpected places by the optimizers: if the unwind edge is critical, + // then breaking it can result in the intrinsics being in the successor of + // the landing pad, not the landing pad itself. This results in exceptions + // not being caught because no typeids are associated with the invoke. + // This may not be the only way things can go wrong, but it is the only way + // we try to work around for the moment. + BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); + + if (Br && Br->isUnconditional()) { // Critical edge? + BasicBlock::iterator I, E; + for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I) + if (isa<EHSelectorInst>(I)) + break; + + if (I == E) + // No catch info found - try to extract some from the successor. + copyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo); + } + } + + // Before doing SelectionDAG ISel, see if FastISel has been requested. + if (FastIS && !SuppressFastISel) { + // Emit code for any incoming arguments. This must happen before + // beginning FastISel on the entry block. + if (LLVMBB == &Fn.getEntryBlock()) { + CurDAG->setRoot(SDL->getControlRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + } + FastIS->startNewBlock(BB); + // Do FastISel on as many instructions as possible. + for (; BI != End; ++BI) { + // Just before the terminator instruction, insert instructions to + // feed PHI nodes in successor blocks. + if (isa<TerminatorInst>(BI)) + if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) { + if (EnableFastISelVerbose || EnableFastISelAbort) { + cerr << "FastISel miss: "; + BI->dump(); + } + if (EnableFastISelAbort) + assert(0 && "FastISel didn't handle a PHI in a successor"); + break; + } + + // First try normal tablegen-generated "fast" selection. + if (FastIS->SelectInstruction(BI)) + continue; + + // Next, try calling the target to attempt to handle the instruction. + if (FastIS->TargetSelectInstruction(BI)) + continue; + + // Then handle certain instructions as single-LLVM-Instruction blocks. + if (isa<CallInst>(BI)) { + if (EnableFastISelVerbose || EnableFastISelAbort) { + cerr << "FastISel missed call: "; + BI->dump(); + } + + if (BI->getType() != Type::VoidTy) { + unsigned &R = FuncInfo->ValueMap[BI]; + if (!R) + R = FuncInfo->CreateRegForValue(BI); + } + + SDL->setCurDebugLoc(FastIS->getCurDebugLoc()); + SelectBasicBlock(LLVMBB, BI, next(BI)); + // If the instruction was codegen'd with multiple blocks, + // inform the FastISel object where to resume inserting. + FastIS->setCurrentBlock(BB); + continue; + } + + // Otherwise, give up on FastISel for the rest of the block. + // For now, be a little lenient about non-branch terminators. + if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) { + if (EnableFastISelVerbose || EnableFastISelAbort) { + cerr << "FastISel miss: "; + BI->dump(); + } + if (EnableFastISelAbort) + // The "fast" selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + assert(0 && "FastISel didn't select the entire block"); + } + break; + } + } + + // Run SelectionDAG instruction selection on the remainder of the block + // not handled by FastISel. If FastISel is not run, this is the entire + // block. + if (BI != End) { + // If FastISel is run and it has known DebugLoc then use it. + if (FastIS && !FastIS->getCurDebugLoc().isUnknown()) + SDL->setCurDebugLoc(FastIS->getCurDebugLoc()); + SelectBasicBlock(LLVMBB, BI, End); + } + + FinishBasicBlock(); + } + + delete FastIS; +} + +void +SelectionDAGISel::FinishBasicBlock() { + + DOUT << "Target-post-processed machine code:\n"; + DEBUG(BB->dump()); + + DOUT << "Total amount of phi nodes to update: " + << SDL->PHINodesToUpdate.size() << "\n"; + DEBUG(for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) + DOUT << "Node " << i << " : (" << SDL->PHINodesToUpdate[i].first + << ", " << SDL->PHINodesToUpdate[i].second << ")\n";); + + // Next, now that we know what the last MBB the LLVM BB expanded is, update + // PHI nodes in successors. + if (SDL->SwitchCases.empty() && + SDL->JTCases.empty() && + SDL->BitTestCases.empty()) { + for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = SDL->PHINodesToUpdate[i].first; + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(BB)); + } + SDL->PHINodesToUpdate.clear(); + return; + } + + for (unsigned i = 0, e = SDL->BitTestCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!SDL->BitTestCases[i].Emitted) { + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->BitTestCases[i].Parent; + SDL->setCurrentBasicBlock(BB); + // Emit the code + SDL->visitBitTestHeader(SDL->BitTestCases[i]); + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + } + + for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); j != ej; ++j) { + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->BitTestCases[i].Cases[j].ThisBB; + SDL->setCurrentBasicBlock(BB); + // Emit the code + if (j+1 != ej) + SDL->visitBitTestCase(SDL->BitTestCases[i].Cases[j+1].ThisBB, + SDL->BitTestCases[i].Reg, + SDL->BitTestCases[i].Cases[j]); + else + SDL->visitBitTestCase(SDL->BitTestCases[i].Default, + SDL->BitTestCases[i].Reg, + SDL->BitTestCases[i].Cases[j]); + + + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + } + + // Update PHI Nodes + for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) { + MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first; + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + // This is "default" BB. We have two jumps to it. From "header" BB and + // from last "case" BB. + if (PHIBB == SDL->BitTestCases[i].Default) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Parent)); + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Cases. + back().ThisBB)); + } + // One of "cases" BB. + for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); + j != ej; ++j) { + MachineBasicBlock* cBB = SDL->BitTestCases[i].Cases[j].ThisBB; + if (cBB->succ_end() != + std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(cBB)); + } + } + } + } + SDL->BitTestCases.clear(); + + // If the JumpTable record is filled in, then we need to emit a jump table. + // Updating the PHI nodes is tricky in this case, since we need to determine + // whether the PHI is a successor of the range check MBB or the jump table MBB + for (unsigned i = 0, e = SDL->JTCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!SDL->JTCases[i].first.Emitted) { + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->JTCases[i].first.HeaderBB; + SDL->setCurrentBasicBlock(BB); + // Emit the code + SDL->visitJumpTableHeader(SDL->JTCases[i].second, SDL->JTCases[i].first); + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + } + + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->JTCases[i].second.MBB; + SDL->setCurrentBasicBlock(BB); + // Emit the code + SDL->visitJumpTable(SDL->JTCases[i].second); + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + + // Update PHI Nodes + for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) { + MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first; + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + // "default" BB. We can go there only from header BB. + if (PHIBB == SDL->JTCases[i].second.Default) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB)); + } + // JT BB. Just iterate over successors here + if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(BB)); + } + } + } + SDL->JTCases.clear(); + + // If the switch block involved a branch to one of the actual successors, we + // need to update PHI nodes in that block. + for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = SDL->PHINodesToUpdate[i].first; + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + if (BB->isSuccessor(PHI->getParent())) { + PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second, + false)); + PHI->addOperand(MachineOperand::CreateMBB(BB)); + } + } + + // If we generated any switch lowering information, build and codegen any + // additional DAGs necessary. + for (unsigned i = 0, e = SDL->SwitchCases.size(); i != e; ++i) { + // Set the current basic block to the mbb we wish to insert the code into + BB = SDL->SwitchCases[i].ThisBB; + SDL->setCurrentBasicBlock(BB); + + // Emit the code + SDL->visitSwitchCase(SDL->SwitchCases[i]); + CurDAG->setRoot(SDL->getRoot()); + CodeGenAndEmitDAG(); + SDL->clear(); + + // Handle any PHI nodes in successors of this chunk, as if we were coming + // from the original BB before switch expansion. Note that PHI nodes can + // occur multiple times in PHINodesToUpdate. We have to be very careful to + // handle them the right number of times. + while ((BB = SDL->SwitchCases[i].TrueBB)) { // Handle LHS and RHS. + for (MachineBasicBlock::iterator Phi = BB->begin(); + Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){ + // This value for this PHI node is recorded in PHINodesToUpdate, get it. + for (unsigned pn = 0; ; ++pn) { + assert(pn != SDL->PHINodesToUpdate.size() && + "Didn't find PHI entry!"); + if (SDL->PHINodesToUpdate[pn].first == Phi) { + Phi->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pn]. + second, false)); + Phi->addOperand(MachineOperand::CreateMBB(SDL->SwitchCases[i].ThisBB)); + break; + } + } + } + + // Don't process RHS if same block as LHS. + if (BB == SDL->SwitchCases[i].FalseBB) + SDL->SwitchCases[i].FalseBB = 0; + + // If we haven't handled the RHS, do so now. Otherwise, we're done. + SDL->SwitchCases[i].TrueBB = SDL->SwitchCases[i].FalseBB; + SDL->SwitchCases[i].FalseBB = 0; + } + assert(SDL->SwitchCases[i].TrueBB == 0 && SDL->SwitchCases[i].FalseBB == 0); + } + SDL->SwitchCases.clear(); + + SDL->PHINodesToUpdate.clear(); +} + + +/// Create the scheduler. If a specific scheduler was specified +/// via the SchedulerRegistry, use it, otherwise select the +/// one preferred by the target. +/// +ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { + RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault(); + + if (!Ctor) { + Ctor = ISHeuristic; + RegisterScheduler::setDefault(Ctor); + } + + return Ctor(this, OptLevel); +} + +ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() { + return new ScheduleHazardRecognizer(); +} + +//===----------------------------------------------------------------------===// +// Helper functions used by the generated instruction selector. +//===----------------------------------------------------------------------===// +// Calls to these methods are generated by tblgen. + +/// CheckAndMask - The isel is trying to match something like (and X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) const { + const APInt &ActualMask = RHS->getAPIntValue(); + const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (ActualMask.intersects(~DesiredMask)) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + APInt NeededMask = DesiredMask & ~ActualMask; + if (CurDAG->MaskedValueIsZero(LHS, NeededMask)) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + +/// CheckOrMask - The isel is trying to match something like (or X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) const { + const APInt &ActualMask = RHS->getAPIntValue(); + const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (ActualMask.intersects(~DesiredMask)) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + APInt NeededMask = DesiredMask & ~ActualMask; + + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne); + + // If all the missing bits in the or are already known to be set, match! + if ((NeededMask & KnownOne) == NeededMask) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + + +/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated +/// by tblgen. Others should not call it. +void SelectionDAGISel:: +SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { + std::vector<SDValue> InOps; + std::swap(InOps, Ops); + + Ops.push_back(InOps[0]); // input chain. + Ops.push_back(InOps[1]); // input asm string. + + unsigned i = 2, e = InOps.size(); + if (InOps[e-1].getValueType() == MVT::Flag) + --e; // Don't process a flag operand if it is here. + + while (i != e) { + unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); + if ((Flags & 7) != 4 /*MEM*/) { + // Just skip over this operand, copying the operands verbatim. + Ops.insert(Ops.end(), InOps.begin()+i, + InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); + i += InlineAsm::getNumOperandRegisters(Flags) + 1; + } else { + assert(InlineAsm::getNumOperandRegisters(Flags) == 1 && + "Memory operand with multiple values?"); + // Otherwise, this is a memory operand. Ask the target to select it. + std::vector<SDValue> SelOps; + if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) { + cerr << "Could not match memory address. Inline asm failure!\n"; + exit(1); + } + + // Add this to the output node. + MVT IntPtrTy = CurDAG->getTargetLoweringInfo().getPointerTy(); + Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3), + IntPtrTy)); + Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); + i += 2; + } + } + + // Add the flag input back if present. + if (e != InOps.size()) + Ops.push_back(InOps.back()); +} + +/// findFlagUse - Return use of MVT::Flag value produced by the specified +/// SDNode. +/// +static SDNode *findFlagUse(SDNode *N) { + unsigned FlagResNo = N->getNumValues()-1; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDUse &Use = I.getUse(); + if (Use.getResNo() == FlagResNo) + return Use.getUser(); + } + return NULL; +} + +/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". +/// This function recursively traverses up the operand chain, ignoring +/// certain nodes. +static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, + SDNode *Root, + SmallPtrSet<SDNode*, 16> &Visited) { + if (Use->getNodeId() < Def->getNodeId() || + !Visited.insert(Use)) + return false; + + for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { + SDNode *N = Use->getOperand(i).getNode(); + if (N == Def) { + if (Use == ImmedUse || Use == Root) + continue; // We are not looking for immediate use. + assert(N != Root); + return true; + } + + // Traverse up the operand chain. + if (findNonImmUse(N, Def, ImmedUse, Root, Visited)) + return true; + } + return false; +} + +/// isNonImmUse - Start searching from Root up the DAG to check is Def can +/// be reached. Return true if that's the case. However, ignore direct uses +/// by ImmedUse (which would be U in the example illustrated in +/// IsLegalAndProfitableToFold) and by Root (which can happen in the store +/// case). +/// FIXME: to be really generic, we should allow direct use by any node +/// that is being folded. But realisticly since we only fold loads which +/// have one non-chain use, we only need to watch out for load/op/store +/// and load/op/cmp case where the root (store / cmp) may reach the load via +/// its chain operand. +static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) { + SmallPtrSet<SDNode*, 16> Visited; + return findNonImmUse(Root, Def, ImmedUse, Root, Visited); +} + +/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of +/// U can be folded during instruction selection that starts at Root and +/// folding N is profitable. +bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, + SDNode *Root) const { + if (OptLevel == CodeGenOpt::None) return false; + + // If Root use can somehow reach N through a path that that doesn't contain + // U then folding N would create a cycle. e.g. In the following + // diagram, Root can reach N through X. If N is folded into into Root, then + // X is both a predecessor and a successor of U. + // + // [N*] // + // ^ ^ // + // / \ // + // [U*] [X]? // + // ^ ^ // + // \ / // + // \ / // + // [Root*] // + // + // * indicates nodes to be folded together. + // + // If Root produces a flag, then it gets (even more) interesting. Since it + // will be "glued" together with its flag use in the scheduler, we need to + // check if it might reach N. + // + // [N*] // + // ^ ^ // + // / \ // + // [U*] [X]? // + // ^ ^ // + // \ \ // + // \ | // + // [Root*] | // + // ^ | // + // f | // + // | / // + // [Y] / // + // ^ / // + // f / // + // | / // + // [FU] // + // + // If FU (flag use) indirectly reaches N (the load), and Root folds N + // (call it Fold), then X is a predecessor of FU and a successor of + // Fold. But since Fold and FU are flagged together, this will create + // a cycle in the scheduling graph. + + MVT VT = Root->getValueType(Root->getNumValues()-1); + while (VT == MVT::Flag) { + SDNode *FU = findFlagUse(Root); + if (FU == NULL) + break; + Root = FU; + VT = Root->getValueType(Root->getNumValues()-1); + } + + return !isNonImmUse(Root, N, U); +} + + +char SelectionDAGISel::ID = 0; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp new file mode 100644 index 000000000000..3eec684c6f8c --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -0,0 +1,416 @@ +//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::viewGraph method. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" +#include <fstream> +using namespace llvm; + +namespace llvm { + template<> + struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits { + static bool hasEdgeDestLabels() { + return true; + } + + static unsigned numEdgeDestLabels(const void *Node) { + return ((const SDNode *) Node)->getNumValues(); + } + + static std::string getEdgeDestLabel(const void *Node, unsigned i) { + return ((const SDNode *) Node)->getValueType(i).getMVTString(); + } + + /// edgeTargetsEdgeSource - This method returns true if this outgoing edge + /// should actually target another edge source, not a node. If this method is + /// implemented, getEdgeTarget should be implemented. + template<typename EdgeIter> + static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) { + return true; + } + + /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is + /// called to determine which outgoing edge of Node is the target of this + /// edge. + template<typename EdgeIter> + static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) { + SDNode *TargetNode = *I; + SDNodeIterator NI = SDNodeIterator::begin(TargetNode); + std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo()); + return NI; + } + + static std::string getGraphName(const SelectionDAG *G) { + return G->getMachineFunction().getFunction()->getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool hasNodeAddressLabel(const SDNode *Node, + const SelectionDAG *Graph) { + return true; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + template<typename EdgeIter> + static std::string getEdgeAttributes(const void *Node, EdgeIter EI) { + SDValue Op = EI.getNode()->getOperand(EI.getOperand()); + MVT VT = Op.getValueType(); + if (VT == MVT::Flag) + return "color=red,style=bold"; + else if (VT == MVT::Other) + return "color=blue,style=dashed"; + return ""; + } + + + static std::string getNodeLabel(const SDNode *Node, + const SelectionDAG *Graph); + static std::string getNodeAttributes(const SDNode *N, + const SelectionDAG *Graph) { +#ifndef NDEBUG + const std::string &Attrs = Graph->getGraphAttrs(N); + if (!Attrs.empty()) { + if (Attrs.find("shape=") == std::string::npos) + return std::string("shape=Mrecord,") + Attrs; + else + return Attrs; + } +#endif + return "shape=Mrecord"; + } + + static void addCustomGraphFeatures(SelectionDAG *G, + GraphWriter<SelectionDAG*> &GW) { + GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + if (G->getRoot().getNode()) + GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(), + "color=blue,style=dashed"); + } + }; +} + +std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, + const SelectionDAG *G) { + std::string Op = Node->getOperationName(G); + + if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Node)) { + Op += ": " + utostr(CSDN->getZExtValue()); + } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(Node)) { + Op += ": " + ftostr(CSDN->getValueAPF()); + } else if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(Node)) { + Op += ": " + GADN->getGlobal()->getName(); + if (int64_t Offset = GADN->getOffset()) { + if (Offset > 0) + Op += "+" + itostr(Offset); + else + Op += itostr(Offset); + } + } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(Node)) { + Op += " " + itostr(FIDN->getIndex()); + } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(Node)) { + Op += " " + itostr(JTDN->getIndex()); + } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Node)){ + if (CP->isMachineConstantPoolEntry()) { + Op += '<'; + { + raw_string_ostream OSS(Op); + OSS << *CP->getMachineCPVal(); + } + Op += '>'; + } else { + if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) + Op += "<" + ftostr(CFP->getValueAPF()) + ">"; + else if (ConstantInt *CI = dyn_cast<ConstantInt>(CP->getConstVal())) + Op += "<" + utostr(CI->getZExtValue()) + ">"; + else { + Op += '<'; + { + raw_string_ostream OSS(Op); + WriteAsOperand(OSS, CP->getConstVal(), false); + } + Op += '>'; + } + } + Op += " A=" + itostr(CP->getAlignment()); + } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(Node)) { + Op = "BB: "; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + Op += LBB->getName(); + //Op += " " + (const void*)BBDN->getBasicBlock(); + } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node)) { + if (G && R->getReg() != 0 && + TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + Op = Op + " " + + G->getTarget().getRegisterInfo()->getName(R->getReg()); + } else { + Op += " #" + utostr(R->getReg()); + } + } else if (const DbgStopPointSDNode *D = dyn_cast<DbgStopPointSDNode>(Node)) { + DICompileUnit CU(cast<GlobalVariable>(D->getCompileUnit())); + std::string FN; + Op += ": " + CU.getFilename(FN); + Op += ":" + utostr(D->getLine()); + if (D->getColumn() != 0) + Op += ":" + utostr(D->getColumn()); + } else if (const LabelSDNode *L = dyn_cast<LabelSDNode>(Node)) { + Op += ": LabelID=" + utostr(L->getLabelID()); + } else if (const CallSDNode *C = dyn_cast<CallSDNode>(Node)) { + Op += ": CallingConv=" + utostr(C->getCallingConv()); + if (C->isVarArg()) + Op += ", isVarArg"; + if (C->isTailCall()) + Op += ", isTailCall"; + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(Node)) { + Op += "'" + std::string(ES->getSymbol()) + "'"; + } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(Node)) { + if (M->getValue()) + Op += "<" + M->getValue()->getName() + ">"; + else + Op += "<null>"; + } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(Node)) { + const Value *V = M->MO.getValue(); + Op += '<'; + if (!V) { + Op += "(unknown)"; + } else if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { + // PseudoSourceValues don't have names, so use their print method. + raw_string_ostream OSS(Op); + PSV->print(OSS); + } else { + Op += V->getName(); + } + Op += '+' + itostr(M->MO.getOffset()) + '>'; + } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(Node)) { + Op = Op + " AF=" + N->getArgFlags().getArgFlagsString(); + } else if (const VTSDNode *N = dyn_cast<VTSDNode>(Node)) { + Op = Op + " VT=" + N->getVT().getMVTString(); + } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node)) { + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: + Op = Op + "<anyext "; + break; + case ISD::SEXTLOAD: + Op = Op + " <sext "; + break; + case ISD::ZEXTLOAD: + Op = Op + " <zext "; + break; + } + if (doExt) + Op += LD->getMemoryVT().getMVTString() + ">"; + if (LD->isVolatile()) + Op += "<V>"; + Op += LD->getIndexedModeName(LD->getAddressingMode()); + if (LD->getAlignment() > 1) + Op += " A=" + utostr(LD->getAlignment()); + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(Node)) { + if (ST->isTruncatingStore()) + Op += "<trunc " + ST->getMemoryVT().getMVTString() + ">"; + if (ST->isVolatile()) + Op += "<V>"; + Op += ST->getIndexedModeName(ST->getAddressingMode()); + if (ST->getAlignment() > 1) + Op += " A=" + utostr(ST->getAlignment()); + } + +#if 0 + Op += " Id=" + itostr(Node->getNodeId()); +#endif + + return Op; +} + + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void SelectionDAG::viewGraph(const std::string &Title) { +// This code is only for debugging! +#ifndef NDEBUG + ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), + Title); +#else + cerr << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +// This overload is defined out-of-line here instead of just using a +// default parameter because this is easiest for gdb to call. +void SelectionDAG::viewGraph() { + viewGraph(""); +} + +/// clearGraphAttrs - Clear all previously defined node graph attributes. +/// Intended to be used from a debugging tool (eg. gdb). +void SelectionDAG::clearGraphAttrs() { +#ifndef NDEBUG + NodeGraphAttrs.clear(); +#else + cerr << "SelectionDAG::clearGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".) +/// +void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { +#ifndef NDEBUG + NodeGraphAttrs[N] = Attrs; +#else + cerr << "SelectionDAG::setGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".) +/// Used from getNodeAttributes. +const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { +#ifndef NDEBUG + std::map<const SDNode *, std::string>::const_iterator I = + NodeGraphAttrs.find(N); + + if (I != NodeGraphAttrs.end()) + return I->second; + else + return ""; +#else + cerr << "SelectionDAG::getGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; + return std::string(""); +#endif +} + +/// setGraphColor - Convenience for setting node color attribute. +/// +void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) { +#ifndef NDEBUG + NodeGraphAttrs[N] = std::string("color=") + Color; +#else + cerr << "SelectionDAG::setGraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + +/// setSubgraphColorHelper - Implement setSubgraphColor. Return +/// whether we truncated the search. +/// +bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited, + int level, bool &printed) { + bool hit_limit = false; + +#ifndef NDEBUG + if (level >= 20) { + if (!printed) { + printed = true; + DOUT << "setSubgraphColor hit max level\n"; + } + return true; + } + + unsigned oldSize = visited.size(); + visited.insert(N); + if (visited.size() != oldSize) { + setGraphColor(N, Color); + for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N); + i != iend; + ++i) { + hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit; + } + } +#else + cerr << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif + return hit_limit; +} + +/// setSubgraphColor - Convenience for setting subgraph color attribute. +/// +void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) { +#ifndef NDEBUG + DenseSet<SDNode *> visited; + bool printed = false; + if (setSubgraphColorHelper(N, Color, visited, 0, printed)) { + // Visually mark that we hit the limit + if (strcmp(Color, "red") == 0) { + setSubgraphColorHelper(N, "blue", visited, 0, printed); + } + else if (strcmp(Color, "yellow") == 0) { + setSubgraphColorHelper(N, "green", visited, 0, printed); + } + } + +#else + cerr << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + +std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { + std::string s; + raw_string_ostream O(s); + O << "SU(" << SU->NodeNum << "): "; + if (SU->getNode()) { + SmallVector<SDNode *, 4> FlaggedNodes; + for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) + FlaggedNodes.push_back(N); + while (!FlaggedNodes.empty()) { + O << DOTGraphTraits<SelectionDAG*>::getNodeLabel(FlaggedNodes.back(), DAG); + FlaggedNodes.pop_back(); + if (!FlaggedNodes.empty()) + O << "\n "; + } + } else { + O << "CROSS RC COPY"; + } + return O.str(); +} + +void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const { + if (DAG) { + // Draw a special "GraphRoot" node to indicate the root of the graph. + GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + const SDNode *N = DAG->getRoot().getNode(); + if (N && N->getNodeId() != -1) + GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1, + "color=blue,style=dashed"); + } +} diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp new file mode 100644 index 000000000000..3334e53f0fbc --- /dev/null +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -0,0 +1,2592 @@ +//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/GlobalVariable.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +namespace llvm { +TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) { + bool isLocal = GV->hasLocalLinkage(); + bool isDeclaration = GV->isDeclaration(); + // FIXME: what should we do for protected and internal visibility? + // For variables, is internal different from hidden? + bool isHidden = GV->hasHiddenVisibility(); + + if (reloc == Reloc::PIC_) { + if (isLocal || isHidden) + return TLSModel::LocalDynamic; + else + return TLSModel::GeneralDynamic; + } else { + if (!isDeclaration || isHidden) + return TLSModel::LocalExec; + else + return TLSModel::InitialExec; + } +} +} + +/// InitLibcallNames - Set default libcall names. +/// +static void InitLibcallNames(const char **Names) { + Names[RTLIB::SHL_I16] = "__ashlhi3"; + Names[RTLIB::SHL_I32] = "__ashlsi3"; + Names[RTLIB::SHL_I64] = "__ashldi3"; + Names[RTLIB::SHL_I128] = "__ashlti3"; + Names[RTLIB::SRL_I16] = "__lshrhi3"; + Names[RTLIB::SRL_I32] = "__lshrsi3"; + Names[RTLIB::SRL_I64] = "__lshrdi3"; + Names[RTLIB::SRL_I128] = "__lshrti3"; + Names[RTLIB::SRA_I16] = "__ashrhi3"; + Names[RTLIB::SRA_I32] = "__ashrsi3"; + Names[RTLIB::SRA_I64] = "__ashrdi3"; + Names[RTLIB::SRA_I128] = "__ashrti3"; + Names[RTLIB::MUL_I16] = "__mulhi3"; + Names[RTLIB::MUL_I32] = "__mulsi3"; + Names[RTLIB::MUL_I64] = "__muldi3"; + Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::SDIV_I16] = "__divhi3"; + Names[RTLIB::SDIV_I32] = "__divsi3"; + Names[RTLIB::SDIV_I64] = "__divdi3"; + Names[RTLIB::SDIV_I128] = "__divti3"; + Names[RTLIB::UDIV_I16] = "__udivhi3"; + Names[RTLIB::UDIV_I32] = "__udivsi3"; + Names[RTLIB::UDIV_I64] = "__udivdi3"; + Names[RTLIB::UDIV_I128] = "__udivti3"; + Names[RTLIB::SREM_I16] = "__modhi3"; + Names[RTLIB::SREM_I32] = "__modsi3"; + Names[RTLIB::SREM_I64] = "__moddi3"; + Names[RTLIB::SREM_I128] = "__modti3"; + Names[RTLIB::UREM_I16] = "__umodhi3"; + Names[RTLIB::UREM_I32] = "__umodsi3"; + Names[RTLIB::UREM_I64] = "__umoddi3"; + Names[RTLIB::UREM_I128] = "__umodti3"; + Names[RTLIB::NEG_I32] = "__negsi2"; + Names[RTLIB::NEG_I64] = "__negdi2"; + Names[RTLIB::ADD_F32] = "__addsf3"; + Names[RTLIB::ADD_F64] = "__adddf3"; + Names[RTLIB::ADD_F80] = "__addxf3"; + Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; + Names[RTLIB::SUB_F32] = "__subsf3"; + Names[RTLIB::SUB_F64] = "__subdf3"; + Names[RTLIB::SUB_F80] = "__subxf3"; + Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; + Names[RTLIB::MUL_F32] = "__mulsf3"; + Names[RTLIB::MUL_F64] = "__muldf3"; + Names[RTLIB::MUL_F80] = "__mulxf3"; + Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; + Names[RTLIB::DIV_F32] = "__divsf3"; + Names[RTLIB::DIV_F64] = "__divdf3"; + Names[RTLIB::DIV_F80] = "__divxf3"; + Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; + Names[RTLIB::REM_F32] = "fmodf"; + Names[RTLIB::REM_F64] = "fmod"; + Names[RTLIB::REM_F80] = "fmodl"; + Names[RTLIB::REM_PPCF128] = "fmodl"; + Names[RTLIB::POWI_F32] = "__powisf2"; + Names[RTLIB::POWI_F64] = "__powidf2"; + Names[RTLIB::POWI_F80] = "__powixf2"; + Names[RTLIB::POWI_PPCF128] = "__powitf2"; + Names[RTLIB::SQRT_F32] = "sqrtf"; + Names[RTLIB::SQRT_F64] = "sqrt"; + Names[RTLIB::SQRT_F80] = "sqrtl"; + Names[RTLIB::SQRT_PPCF128] = "sqrtl"; + Names[RTLIB::LOG_F32] = "logf"; + Names[RTLIB::LOG_F64] = "log"; + Names[RTLIB::LOG_F80] = "logl"; + Names[RTLIB::LOG_PPCF128] = "logl"; + Names[RTLIB::LOG2_F32] = "log2f"; + Names[RTLIB::LOG2_F64] = "log2"; + Names[RTLIB::LOG2_F80] = "log2l"; + Names[RTLIB::LOG2_PPCF128] = "log2l"; + Names[RTLIB::LOG10_F32] = "log10f"; + Names[RTLIB::LOG10_F64] = "log10"; + Names[RTLIB::LOG10_F80] = "log10l"; + Names[RTLIB::LOG10_PPCF128] = "log10l"; + Names[RTLIB::EXP_F32] = "expf"; + Names[RTLIB::EXP_F64] = "exp"; + Names[RTLIB::EXP_F80] = "expl"; + Names[RTLIB::EXP_PPCF128] = "expl"; + Names[RTLIB::EXP2_F32] = "exp2f"; + Names[RTLIB::EXP2_F64] = "exp2"; + Names[RTLIB::EXP2_F80] = "exp2l"; + Names[RTLIB::EXP2_PPCF128] = "exp2l"; + Names[RTLIB::SIN_F32] = "sinf"; + Names[RTLIB::SIN_F64] = "sin"; + Names[RTLIB::SIN_F80] = "sinl"; + Names[RTLIB::SIN_PPCF128] = "sinl"; + Names[RTLIB::COS_F32] = "cosf"; + Names[RTLIB::COS_F64] = "cos"; + Names[RTLIB::COS_F80] = "cosl"; + Names[RTLIB::COS_PPCF128] = "cosl"; + Names[RTLIB::POW_F32] = "powf"; + Names[RTLIB::POW_F64] = "pow"; + Names[RTLIB::POW_F80] = "powl"; + Names[RTLIB::POW_PPCF128] = "powl"; + Names[RTLIB::CEIL_F32] = "ceilf"; + Names[RTLIB::CEIL_F64] = "ceil"; + Names[RTLIB::CEIL_F80] = "ceill"; + Names[RTLIB::CEIL_PPCF128] = "ceill"; + Names[RTLIB::TRUNC_F32] = "truncf"; + Names[RTLIB::TRUNC_F64] = "trunc"; + Names[RTLIB::TRUNC_F80] = "truncl"; + Names[RTLIB::TRUNC_PPCF128] = "truncl"; + Names[RTLIB::RINT_F32] = "rintf"; + Names[RTLIB::RINT_F64] = "rint"; + Names[RTLIB::RINT_F80] = "rintl"; + Names[RTLIB::RINT_PPCF128] = "rintl"; + Names[RTLIB::NEARBYINT_F32] = "nearbyintf"; + Names[RTLIB::NEARBYINT_F64] = "nearbyint"; + Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; + Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; + Names[RTLIB::FLOOR_F32] = "floorf"; + Names[RTLIB::FLOOR_F64] = "floor"; + Names[RTLIB::FLOOR_F80] = "floorl"; + Names[RTLIB::FLOOR_PPCF128] = "floorl"; + Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; + Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; + Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; + Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; + Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; + Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; + Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; + Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; + Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; + Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; + Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; + Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; + Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; + Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; + Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; + Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; + Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; + Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; + Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; + Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; + Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; + Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; + Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; + Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; + Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; + Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; + Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; + Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; + Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; + Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; + Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; + Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; + Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; + Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; + Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; + Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; + Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; + Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; + Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; + Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; + Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; + Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; + Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf"; + Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf"; + Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf"; + Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf"; + Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf"; + Names[RTLIB::OEQ_F32] = "__eqsf2"; + Names[RTLIB::OEQ_F64] = "__eqdf2"; + Names[RTLIB::UNE_F32] = "__nesf2"; + Names[RTLIB::UNE_F64] = "__nedf2"; + Names[RTLIB::OGE_F32] = "__gesf2"; + Names[RTLIB::OGE_F64] = "__gedf2"; + Names[RTLIB::OLT_F32] = "__ltsf2"; + Names[RTLIB::OLT_F64] = "__ltdf2"; + Names[RTLIB::OLE_F32] = "__lesf2"; + Names[RTLIB::OLE_F64] = "__ledf2"; + Names[RTLIB::OGT_F32] = "__gtsf2"; + Names[RTLIB::OGT_F64] = "__gtdf2"; + Names[RTLIB::UO_F32] = "__unordsf2"; + Names[RTLIB::UO_F64] = "__unorddf2"; + Names[RTLIB::O_F32] = "__unordsf2"; + Names[RTLIB::O_F64] = "__unorddf2"; + Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; +} + +/// getFPEXT - Return the FPEXT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPEXT(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::f64) + return FPEXT_F32_F64; + } + return UNKNOWN_LIBCALL; +} + +/// getFPROUND - Return the FPROUND_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) { + if (RetVT == MVT::f32) { + if (OpVT == MVT::f64) + return FPROUND_F64_F32; + if (OpVT == MVT::f80) + return FPROUND_F80_F32; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F32; + } else if (RetVT == MVT::f64) { + if (OpVT == MVT::f80) + return FPROUND_F80_F64; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F64; + } + return UNKNOWN_LIBCALL; +} + +/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i32) + return FPTOSINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i32) + return FPTOSINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOSINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F80_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOSINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i32) + return FPTOUINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i32) + return FPTOUINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOUINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F80_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOUINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getSINTTOFP(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return SINTTOFP_I32_F32; + else if (RetVT == MVT::f64) + return SINTTOFP_I32_F64; + else if (RetVT == MVT::f80) + return SINTTOFP_I32_F80; + else if (RetVT == MVT::ppcf128) + return SINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return SINTTOFP_I64_F32; + else if (RetVT == MVT::f64) + return SINTTOFP_I64_F64; + else if (RetVT == MVT::f80) + return SINTTOFP_I64_F80; + else if (RetVT == MVT::ppcf128) + return SINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return SINTTOFP_I128_F32; + else if (RetVT == MVT::f64) + return SINTTOFP_I128_F64; + else if (RetVT == MVT::f80) + return SINTTOFP_I128_F80; + else if (RetVT == MVT::ppcf128) + return SINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getUINTTOFP(MVT OpVT, MVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return UINTTOFP_I32_F32; + else if (RetVT == MVT::f64) + return UINTTOFP_I32_F64; + else if (RetVT == MVT::f80) + return UINTTOFP_I32_F80; + else if (RetVT == MVT::ppcf128) + return UINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return UINTTOFP_I64_F32; + else if (RetVT == MVT::f64) + return UINTTOFP_I64_F64; + else if (RetVT == MVT::f80) + return UINTTOFP_I64_F80; + else if (RetVT == MVT::ppcf128) + return UINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return UINTTOFP_I128_F32; + else if (RetVT == MVT::f64) + return UINTTOFP_I128_F64; + else if (RetVT == MVT::f80) + return UINTTOFP_I128_F80; + else if (RetVT == MVT::ppcf128) + return UINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// InitCmpLibcallCCs - Set default comparison libcall CC. +/// +static void InitCmpLibcallCCs(ISD::CondCode *CCs) { + memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); + CCs[RTLIB::OEQ_F32] = ISD::SETEQ; + CCs[RTLIB::OEQ_F64] = ISD::SETEQ; + CCs[RTLIB::UNE_F32] = ISD::SETNE; + CCs[RTLIB::UNE_F64] = ISD::SETNE; + CCs[RTLIB::OGE_F32] = ISD::SETGE; + CCs[RTLIB::OGE_F64] = ISD::SETGE; + CCs[RTLIB::OLT_F32] = ISD::SETLT; + CCs[RTLIB::OLT_F64] = ISD::SETLT; + CCs[RTLIB::OLE_F32] = ISD::SETLE; + CCs[RTLIB::OLE_F64] = ISD::SETLE; + CCs[RTLIB::OGT_F32] = ISD::SETGT; + CCs[RTLIB::OGT_F64] = ISD::SETGT; + CCs[RTLIB::UO_F32] = ISD::SETNE; + CCs[RTLIB::UO_F64] = ISD::SETNE; + CCs[RTLIB::O_F32] = ISD::SETEQ; + CCs[RTLIB::O_F64] = ISD::SETEQ; +} + +TargetLowering::TargetLowering(TargetMachine &tm) + : TM(tm), TD(TM.getTargetData()) { + // All operations default to being supported. + memset(OpActions, 0, sizeof(OpActions)); + memset(LoadExtActions, 0, sizeof(LoadExtActions)); + memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); + memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); + memset(ConvertActions, 0, sizeof(ConvertActions)); + memset(CondCodeActions, 0, sizeof(CondCodeActions)); + + // Set default actions for various operations. + for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { + // Default all indexed load / store to expand. + for (unsigned IM = (unsigned)ISD::PRE_INC; + IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { + setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); + setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); + } + + // These operations default to expand. + setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); + } + + // Most targets ignore the @llvm.prefetch intrinsic. + setOperationAction(ISD::PREFETCH, MVT::Other, Expand); + + // ConstantFP nodes default to expand. Targets can either change this to + // Legal, in which case all fp constants are legal, or use addLegalFPImmediate + // to optimize expansions for certain constants. + setOperationAction(ISD::ConstantFP, MVT::f32, Expand); + setOperationAction(ISD::ConstantFP, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f80, Expand); + + // These library functions default to expand. + setOperationAction(ISD::FLOG , MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10,MVT::f64, Expand); + setOperationAction(ISD::FEXP , MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FLOG , MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10,MVT::f32, Expand); + setOperationAction(ISD::FEXP , MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + + // Default ISD::TRAP to expand (which turns it into abort). + setOperationAction(ISD::TRAP, MVT::Other, Expand); + + IsLittleEndian = TD->isLittleEndian(); + UsesGlobalOffsetTable = false; + ShiftAmountTy = PointerTy = getValueType(TD->getIntPtrType()); + ShiftAmtHandling = Undefined; + memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); + memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); + maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; + allowUnalignedMemoryAccesses = false; + benefitFromCodePlacementOpt = false; + UseUnderscoreSetJmp = false; + UseUnderscoreLongJmp = false; + SelectIsExpensive = false; + IntDivIsCheap = false; + Pow2DivIsCheap = false; + StackPointerRegisterToSaveRestore = 0; + ExceptionPointerRegister = 0; + ExceptionSelectorRegister = 0; + BooleanContents = UndefinedBooleanContent; + SchedPreferenceInfo = SchedulingForLatency; + JumpBufSize = 0; + JumpBufAlignment = 0; + IfCvtBlockSizeLimit = 2; + IfCvtDupBlockSizeLimit = 0; + PrefLoopAlignment = 0; + + InitLibcallNames(LibcallRoutineNames); + InitCmpLibcallCCs(CmpLibcallCCs); + + // Tell Legalize whether the assembler supports DEBUG_LOC. + const TargetAsmInfo *TASM = TM.getTargetAsmInfo(); + if (!TASM || !TASM->hasDotLocAndDotFile()) + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); +} + +TargetLowering::~TargetLowering() {} + +/// computeRegisterProperties - Once all of the register classes are added, +/// this allows us to compute derived properties we expose. +void TargetLowering::computeRegisterProperties() { + assert(MVT::LAST_VALUETYPE <= 32 && + "Too many value types for ValueTypeActions to hold!"); + + // Everything defaults to needing one register. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + NumRegistersForVT[i] = 1; + RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; + } + // ...except isVoid, which doesn't need any registers. + NumRegistersForVT[MVT::isVoid] = 0; + + // Find the largest integer register class. + unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; + for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) + assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); + + // Every integer value type larger than this largest register takes twice as + // many registers to represent as the previous ValueType. + for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) { + MVT EVT = (MVT::SimpleValueType)ExpandedReg; + if (!EVT.isInteger()) + break; + NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; + RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; + TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); + ValueTypeActions.setTypeAction(EVT, Expand); + } + + // Inspect all of the ValueType's smaller than the largest integer + // register to see which ones need promotion. + unsigned LegalIntReg = LargestIntReg; + for (unsigned IntReg = LargestIntReg - 1; + IntReg >= (unsigned)MVT::i1; --IntReg) { + MVT IVT = (MVT::SimpleValueType)IntReg; + if (isTypeLegal(IVT)) { + LegalIntReg = IntReg; + } else { + RegisterTypeForVT[IntReg] = TransformToType[IntReg] = + (MVT::SimpleValueType)LegalIntReg; + ValueTypeActions.setTypeAction(IVT, Promote); + } + } + + // ppcf128 type is really two f64's. + if (!isTypeLegal(MVT::ppcf128)) { + NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::ppcf128] = MVT::f64; + TransformToType[MVT::ppcf128] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::ppcf128, Expand); + } + + // Decide how to handle f64. If the target does not have native f64 support, + // expand it to i64 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; + RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; + TransformToType[MVT::f64] = MVT::i64; + ValueTypeActions.setTypeAction(MVT::f64, Expand); + } + + // Decide how to handle f32. If the target does not have native support for + // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. + if (!isTypeLegal(MVT::f32)) { + if (isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; + TransformToType[MVT::f32] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::f32, Promote); + } else { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; + TransformToType[MVT::f32] = MVT::i32; + ValueTypeActions.setTypeAction(MVT::f32, Expand); + } + } + + // Loop over all of the vector value types to see which need transformations. + for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType)i; + if (!isTypeLegal(VT)) { + MVT IntermediateVT, RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = + getVectorTypeBreakdown(VT, + IntermediateVT, NumIntermediates, + RegisterVT); + RegisterTypeForVT[i] = RegisterVT; + + // Determine if there is a legal wider type. + bool IsLegalWiderType = false; + MVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType)nVT; + if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts) { + TransformToType[i] = SVT; + ValueTypeActions.setTypeAction(VT, Promote); + IsLegalWiderType = true; + break; + } + } + if (!IsLegalWiderType) { + MVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + ValueTypeActions.setTypeAction(VT, Expand); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, Promote); + } + } + } + } +} + +const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { + return NULL; +} + + +MVT TargetLowering::getSetCCResultType(MVT VT) const { + return getValueType(TD->getIntPtrType()); +} + + +/// getVectorTypeBreakdown - Vector types are broken down into some number of +/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 +/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. +/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. +/// +/// This method returns the number of registers needed, and the VT for each +/// register. It also returns the VT and quantity of the intermediate values +/// before they are promoted/expanded. +/// +unsigned TargetLowering::getVectorTypeBreakdown(MVT VT, + MVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT) const { + // Figure out the right, legal destination reg to copy into. + unsigned NumElts = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + MVT NewVT = MVT::getVectorVT(EltTy, NumElts); + if (!isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + MVT DestVT = getRegisterType(NewVT); + RegisterVT = DestVT; + if (DestVT.bitsLT(NewVT)) { + // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + } else { + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; + } + + return 1; +} + +/// getWidenVectorType: given a vector type, returns the type to widen to +/// (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself. +/// If there is no vector type that we want to widen to, returns MVT::Other +/// When and where to widen is target dependent based on the cost of +/// scalarizing vs using the wider vector type. +MVT TargetLowering::getWidenVectorType(MVT VT) const { + assert(VT.isVector()); + if (isTypeLegal(VT)) + return VT; + + // Default is not to widen until moved to LegalizeTypes + return MVT::Other; +} + +/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate +/// function arguments in the caller parameter area. This is the actual +/// alignment, not its logarithm. +unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const { + return TD->getCallFrameTypeAlignment(Ty); +} + +SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const { + if (usesGlobalOffsetTable()) + return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + return Table; +} + +bool +TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // Assume that everything is safe in static mode. + if (getTargetMachine().getRelocationModel() == Reloc::Static) + return true; + + // In dynamic-no-pic mode, assume that known defined values are safe. + if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC && + GA && + !GA->getGlobal()->isDeclaration() && + !GA->getGlobal()->isWeakForLinker()) + return true; + + // Otherwise assume nothing is safe. + return false; +} + +//===----------------------------------------------------------------------===// +// Optimization Methods +//===----------------------------------------------------------------------===// + +/// ShrinkDemandedConstant - Check to see if the specified operand of the +/// specified instruction is a constant integer. If so, check to see if there +/// are any bits set in the constant that are not demanded. If so, shrink the +/// constant and return true. +bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, + const APInt &Demanded) { + DebugLoc dl = Op.getDebugLoc(); + + // FIXME: ISD::SELECT, ISD::SELECT_CC + switch (Op.getOpcode()) { + default: break; + case ISD::XOR: + case ISD::AND: + case ISD::OR: { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (!C) return false; + + if (Op.getOpcode() == ISD::XOR && + (C->getAPIntValue() | (~Demanded)).isAllOnesValue()) + return false; + + // if we can expand it to have all bits set, do it + if (C->getAPIntValue().intersects(~Demanded)) { + MVT VT = Op.getValueType(); + SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0), + DAG.getConstant(Demanded & + C->getAPIntValue(), + VT)); + return CombineTo(Op, New); + } + + break; + } + } + + return false; +} + +/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the +/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening +/// cast, but it could be generalized for targets with other types of +/// implicit widening casts. +bool +TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, + unsigned BitWidth, + const APInt &Demanded, + DebugLoc dl) { + assert(Op.getNumOperands() == 2 && + "ShrinkDemandedOp only supports binary operators!"); + assert(Op.getNode()->getNumValues() == 1 && + "ShrinkDemandedOp only supports nodes with one result!"); + + // Don't do this if the node has another user, which may require the + // full value. + if (!Op.getNode()->hasOneUse()) + return false; + + // Search for the smallest integer type with free casts to and from + // Op's type. For expedience, just check power-of-2 integer types. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros(); + if (!isPowerOf2_32(SmallVTBits)) + SmallVTBits = NextPowerOf2(SmallVTBits); + for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { + MVT SmallVT = MVT::getIntegerVT(SmallVTBits); + if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && + TLI.isZExtFree(SmallVT, Op.getValueType())) { + // We found a type with free casts. + SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT, + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, + Op.getNode()->getOperand(0)), + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, + Op.getNode()->getOperand(1))); + SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X); + return CombineTo(Op, Z); + } + } + return false; +} + +/// SimplifyDemandedBits - Look at Op. At this point, we know that only the +/// DemandedMask bits of the result of Op are ever used downstream. If we can +/// use this information to simplify Op, create a new simplified DAG node and +/// return true, returning the original and new nodes in Old and New. Otherwise, +/// analyze the expression and return a mask of KnownOne and KnownZero bits for +/// the expression (used to simplify the caller). The KnownZero/One bits may +/// only be accurate for those bits in the DemandedMask. +bool TargetLowering::SimplifyDemandedBits(SDValue Op, + const APInt &DemandedMask, + APInt &KnownZero, + APInt &KnownOne, + TargetLoweringOpt &TLO, + unsigned Depth) const { + unsigned BitWidth = DemandedMask.getBitWidth(); + assert(Op.getValueSizeInBits() == BitWidth && + "Mask size mismatches value type size!"); + APInt NewMask = DemandedMask; + DebugLoc dl = Op.getDebugLoc(); + + // Don't know anything. + KnownZero = KnownOne = APInt(BitWidth, 0); + + // Other users may use these bits. + if (!Op.getNode()->hasOneUse()) { + if (Depth != 0) { + // If not at the root, Just compute the KnownZero/KnownOne bits to + // simplify things downstream. + TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); + return false; + } + // If this is the root being simplified, allow it to have multiple uses, + // just set the NewMask to all bits. + NewMask = APInt::getAllOnesValue(BitWidth); + } else if (DemandedMask == 0) { + // Not demanding any bits from Op. + if (Op.getOpcode() != ISD::UNDEF) + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); + return false; + } else if (Depth == 6) { // Limit search depth. + return false; + } + + APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask; + KnownZero = ~KnownOne & NewMask; + return false; // Don't fall through, will infinitely loop. + case ISD::AND: + // If the RHS is a constant, check to see if the LHS would be zero without + // using the bits from the RHS. Below, we use knowledge about the RHS to + // simplify the LHS, here we're using information from the LHS to simplify + // the RHS. + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + APInt LHSZero, LHSOne; + TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask, + LHSZero, LHSOne, Depth+1); + // If the LHS already has zeros where RHSC does, this and is dead. + if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + // If any of the set bits in the RHS are known zero on the LHS, shrink + // the constant. + if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) + return true; + } + + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask, + KnownZero2, KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known one on one side, return the other. + // These bits cannot contribute to the result of the 'and'. + if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((NewMask & (KnownZero|KnownZero2)) == NewMask) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType())); + // If the RHS is a constant, see if we can simplify it. + if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) + return true; + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + break; + case ISD::OR: + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask, + KnownZero2, KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If the RHS is a constant, see if we can simplify it. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + break; + case ISD::XOR: + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'xor'. + if ((KnownZero & NewMask) == NewMask) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((KnownZero2 & NewMask) == NewMask) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // If all of the unknown bits are known to be zero on one side or the other + // (but not both) turn this into an *inclusive* or. + // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 + if ((NewMask & ~KnownZero & ~KnownZero2) == 0) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), + Op.getOperand(0), + Op.getOperand(1))); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + + // If all of the demanded bits on one side are known, and all of the set + // bits on that side are also known to be set on the other side, turn this + // into an AND, as we know the bits will be cleared. + // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known + if ((KnownOne & KnownOne2) == KnownOne) { + MVT VT = Op.getValueType(); + SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, + Op.getOperand(0), ANDC)); + } + } + + // If the RHS is a constant, see if we can simplify it. + // for XOR, we prefer to force bits to 1 if they will make a -1. + // if we can't force bits, try to shrink constant + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + APInt Expanded = C->getAPIntValue() | (~NewMask); + // if we can expand it to have all bits set, do it + if (Expanded.isAllOnesValue()) { + if (Expanded != C->getAPIntValue()) { + MVT VT = Op.getValueType(); + SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0), + TLO.DAG.getConstant(Expanded, VT)); + return TLO.CombineTo(Op, New); + } + // if it already has all the bits set, nothing to change + // but don't shrink either! + } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) { + return true; + } + } + + KnownZero = KnownZeroOut; + KnownOne = KnownOneOut; + break; + case ISD::SELECT: + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SELECT_CC: + if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SHL: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + SDValue InOp = Op.getOperand(0); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a + // single shift. We can do this if the bottom bits (which are shifted + // out) are never demanded. + if (InOp.getOpcode() == ISD::SRL && + isa<ConstantSDNode>(InOp.getOperand(1))) { + if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); + unsigned Opc = ISD::SHL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SRL; + } + + SDValue NewSA = + TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + MVT VT = Op.getValueType(); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), NewSA)); + } + } + + if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt), + KnownZero, KnownOne, TLO, Depth+1)) + return true; + KnownZero <<= SA->getZExtValue(); + KnownOne <<= SA->getZExtValue(); + // low bits known zero. + KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue()); + } + break; + case ISD::SRL: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + MVT VT = Op.getValueType(); + unsigned ShAmt = SA->getZExtValue(); + unsigned VTSize = VT.getSizeInBits(); + SDValue InOp = Op.getOperand(0); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a + // single shift. We can do this if the top bits (which are shifted out) + // are never demanded. + if (InOp.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(InOp.getOperand(1))) { + if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) { + unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); + unsigned Opc = ISD::SRL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SHL; + } + + SDValue NewSA = + TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), NewSA)); + } + } + + // Compute the new bits that are at the top now. + if (SimplifyDemandedBits(InOp, (NewMask << ShAmt), + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + KnownZero |= HighBits; // High bits known zero. + } + break; + case ISD::SRA: + // If this is an arithmetic shift right and only the low-bit is set, we can + // always convert this into a logical shr, even if the shift amount is + // variable. The low bit of the shift cannot be an input sign bit unless + // the shift amount is >= the size of the datatype, which is undefined. + if (DemandedMask == 1) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), + Op.getOperand(0), Op.getOperand(1))); + + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + MVT VT = Op.getValueType(); + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + APInt InDemandedMask = (NewMask << ShAmt); + + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + if (HighBits.intersects(NewMask)) + InDemandedMask |= APInt::getSignBit(VT.getSizeInBits()); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + // Handle the sign bit, adjusted to where it is now in the mask. + APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt); + + // If the input sign bit is known to be zero, or if none of the top bits + // are demanded, turn this into an unsigned shift right. + if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, + Op.getOperand(0), + Op.getOperand(1))); + } else if (KnownOne.intersects(SignBit)) { // New bits are known one. + KnownOne |= HighBits; + } + } + break; + case ISD::SIGN_EXTEND_INREG: { + MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + APInt NewBits = APInt::getHighBitsSet(BitWidth, + BitWidth - EVT.getSizeInBits()) & + NewMask; + + // If none of the extended bits are demanded, eliminate the sextinreg. + if (NewBits == 0) + return TLO.CombineTo(Op, Op.getOperand(0)); + + APInt InSignBit = APInt::getSignBit(EVT.getSizeInBits()); + InSignBit.zext(BitWidth); + APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, + EVT.getSizeInBits()) & + NewMask; + + // Since the sign extended bits are demanded, we know that the sign + // bit is demanded. + InputDemandedBits |= InSignBit; + + if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + + // If the input sign bit is known zero, convert this into a zero extension. + if (KnownZero.intersects(InSignBit)) + return TLO.CombineTo(Op, + TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT)); + + if (KnownOne.intersects(InSignBit)) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + break; + } + case ISD::ZERO_EXTEND: { + unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits(); + APInt InMask = NewMask; + InMask.trunc(OperandBitWidth); + + // If none of the top bits are demanded, convert this into an any_extend. + APInt NewBits = + APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask; + if (!NewBits.intersects(NewMask)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + if (SimplifyDemandedBits(Op.getOperand(0), InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + KnownZero |= NewBits; + break; + } + case ISD::SIGN_EXTEND: { + MVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getSizeInBits(); + APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); + APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); + APInt NewBits = ~InMask & NewMask; + + // If none of the top bits are demanded, convert this into an any_extend. + if (NewBits == 0) + return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + // Since some of the sign extended bits are demanded, we know that the sign + // bit is demanded. + APInt InDemandedBits = InMask & NewMask; + InDemandedBits |= InSignBit; + InDemandedBits.trunc(InBits); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + + // If the sign bit is known zero, convert this to a zero extend. + if (KnownZero.intersects(InSignBit)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + // If the sign bit is known one, the top bits match. + if (KnownOne.intersects(InSignBit)) { + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Otherwise, top bits aren't known. + KnownOne &= ~NewBits; + KnownZero &= ~NewBits; + } + break; + } + case ISD::ANY_EXTEND: { + unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits(); + APInt InMask = NewMask; + InMask.trunc(OperandBitWidth); + if (SimplifyDemandedBits(Op.getOperand(0), InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + break; + } + case ISD::TRUNCATE: { + // Simplify the input, using demanded bit information, and compute the known + // zero/one bits live out. + APInt TruncMask = NewMask; + TruncMask.zext(Op.getOperand(0).getValueSizeInBits()); + if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + KnownZero.trunc(BitWidth); + KnownOne.trunc(BitWidth); + + // If the input is only used by this truncate, see if we can shrink it based + // on the known demanded bits. + if (Op.getOperand(0).getNode()->hasOneUse()) { + SDValue In = Op.getOperand(0); + unsigned InBitWidth = In.getValueSizeInBits(); + switch (In.getOpcode()) { + default: break; + case ISD::SRL: + // Shrink SRL by a constant if none of the high bits shifted in are + // demanded. + if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){ + APInt HighBits = APInt::getHighBitsSet(InBitWidth, + InBitWidth - BitWidth); + HighBits = HighBits.lshr(ShAmt->getZExtValue()); + HighBits.trunc(BitWidth); + + if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { + // None of the shifted in bits are needed. Add a truncate of the + // shift input, then shift it. + SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, + Op.getValueType(), + In.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, + Op.getValueType(), + NewTrunc, + In.getOperand(1))); + } + } + break; + } + } + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + break; + } + case ISD::AssertZext: { + MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); + if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero |= ~InMask & NewMask; + break; + } + case ISD::BIT_CONVERT: +#if 0 + // If this is an FP->Int bitcast and if the sign bit is the only thing that + // is demanded, turn this into a FGETSIGN. + if (NewMask == MVT::getIntegerVTSignBit(Op.getValueType()) && + MVT::isFloatingPoint(Op.getOperand(0).getValueType()) && + !MVT::isVector(Op.getOperand(0).getValueType())) { + // Only do this xform if FGETSIGN is valid or if before legalize. + if (!TLO.AfterLegalize || + isOperationLegal(ISD::FGETSIGN, Op.getValueType())) { + // Make a FGETSIGN + SHL to move the sign bit into the appropriate + // place. We expect the SHL to be eliminated by other optimizations. + SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), + Op.getOperand(0)); + unsigned ShVal = Op.getValueType().getSizeInBits()-1; + SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(), + Sign, ShAmt)); + } + } +#endif + break; + case ISD::ADD: + case ISD::MUL: + case ISD::SUB: { + // Add, Sub, and Mul don't demand any bits in positions beyond that + // of the highest bit demanded of them. + APInt LoMask = APInt::getLowBitsSet(BitWidth, + BitWidth - NewMask.countLeadingZeros()); + if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + // See if the operation should be performed at a smaller bit width. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + } + // FALL THROUGH + default: + // Just use ComputeMaskedBits to compute output bits. + TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth); + break; + } + + // If we know the value of all of the demanded bits, return this as a + // constant. + if ((NewMask & (KnownZero|KnownOne)) == NewMask) + return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); + + return false; +} + +/// computeMaskedBitsForTargetNode - Determine which of the bits specified +/// in Mask are known to be either zero or one and return them in the +/// KnownZero/KnownOne bitsets. +void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use MaskedValueIsZero if you don't know whether Op" + " is a target node!"); + KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); +} + +/// ComputeNumSignBitsForTargetNode - This method can be implemented by +/// targets that want to expose additional information about sign bits to the +/// DAG Combiner. +unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use ComputeNumSignBits if you don't know whether Op" + " is a target node!"); + return 1; +} + +/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly +/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to +/// determine which bit is set. +/// +static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { + // A left-shift of a constant one will have exactly one bit set, because + // shifting the bit off the end is undefined. + if (Val.getOpcode() == ISD::SHL) + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0))) + if (C->getAPIntValue() == 1) + return true; + + // Similarly, a right-shift of a constant sign-bit will have exactly + // one bit set. + if (Val.getOpcode() == ISD::SRL) + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0))) + if (C->getAPIntValue().isSignBit()) + return true; + + // More could be done here, though the above checks are enough + // to handle some common cases. + + // Fall back to ComputeMaskedBits to catch other known cases. + MVT OpVT = Val.getValueType(); + unsigned BitWidth = OpVT.getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero, KnownOne; + DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne); + return (KnownZero.countPopulation() == BitWidth - 1) && + (KnownOne.countPopulation() == 1); +} + +/// SimplifySetCC - Try to simplify a setcc built with the specified operands +/// and cc. If it is unable to simplify it, return a null SDValue. +SDValue +TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, bool foldBooleans, + DAGCombinerInfo &DCI, DebugLoc dl) const { + SelectionDAG &DAG = DCI.DAG; + + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return DAG.getConstant(0, VT); + case ISD::SETTRUE: + case ISD::SETTRUE2: return DAG.getConstant(1, VT); + } + + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); + if (isa<ConstantSDNode>(N0.getNode())) { + return DAG.FoldSetCC(VT, N0, N1, Cond, dl); + } else { + // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an + // equality comparison, then we're just comparing whether X itself is + // zero. + if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && + N0.getOperand(0).getOpcode() == ISD::CTLZ && + N0.getOperand(1).getOpcode() == ISD::Constant) { + unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { + if ((C1 == 0) == (Cond == ISD::SETEQ)) { + // (srl (ctlz x), 5) == 0 -> X != 0 + // (srl (ctlz x), 5) != 1 -> X != 0 + Cond = ISD::SETNE; + } else { + // (srl (ctlz x), 5) != 0 -> X == 0 + // (srl (ctlz x), 5) == 1 -> X == 0 + Cond = ISD::SETEQ; + } + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), + Zero, Cond); + } + } + + // If the LHS is '(and load, const)', the RHS is 0, + // the test is for equality or unsigned, and all 1 bits of the const are + // in the same partial word, see if we can shorten the load. + if (DCI.isBeforeLegalize() && + N0.getOpcode() == ISD::AND && C1 == 0 && + N0.getNode()->hasOneUse() && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(0).getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0)); + uint64_t bestMask = 0; + unsigned bestWidth = 0, bestOffset = 0; + if (!Lod->isVolatile() && Lod->isUnindexed() && + // FIXME: This uses getZExtValue() below so it only works on i64 and + // below. + N0.getValueType().getSizeInBits() <= 64) { + unsigned origWidth = N0.getValueType().getSizeInBits(); + // We can narrow (e.g.) 16-bit extending loads on 32-bit target to + // 8 bits, but have to be careful... + if (Lod->getExtensionType() != ISD::NON_EXTLOAD) + origWidth = Lod->getMemoryVT().getSizeInBits(); + uint64_t Mask =cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + for (unsigned width = origWidth / 2; width>=8; width /= 2) { + uint64_t newMask = (1ULL << width) - 1; + for (unsigned offset=0; offset<origWidth/width; offset++) { + if ((newMask & Mask) == Mask) { + if (!TD->isLittleEndian()) + bestOffset = (origWidth/width - offset - 1) * (width/8); + else + bestOffset = (uint64_t)offset * (width/8); + bestMask = Mask >> (offset * (width/8) * 8); + bestWidth = width; + break; + } + newMask = newMask << width; + } + } + } + if (bestWidth) { + MVT newVT = MVT::getIntegerVT(bestWidth); + if (newVT.isRound()) { + MVT PtrType = Lod->getOperand(1).getValueType(); + SDValue Ptr = Lod->getBasePtr(); + if (bestOffset != 0) + Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), + DAG.getConstant(bestOffset, PtrType)); + unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); + SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, + Lod->getSrcValue(), + Lod->getSrcValueOffset() + bestOffset, + false, NewAlign); + return DAG.getSetCC(dl, VT, + DAG.getNode(ISD::AND, dl, newVT, NewLoad, + DAG.getConstant(bestMask, newVT)), + DAG.getConstant(0LL, newVT), Cond); + } + } + } + + // If the LHS is a ZERO_EXTEND, perform the comparison on the input. + if (N0.getOpcode() == ISD::ZERO_EXTEND) { + unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits(); + + // If the comparison constant has bits in the upper part, the + // zero-extended value could never match. + if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(), + C1.getBitWidth() - InSize))) { + switch (Cond) { + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETEQ: return DAG.getConstant(0, VT); + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETNE: return DAG.getConstant(1, VT); + case ISD::SETGT: + case ISD::SETGE: + // True if the sign bit of C1 is set. + return DAG.getConstant(C1.isNegative(), VT); + case ISD::SETLT: + case ISD::SETLE: + // True if the sign bit of C1 isn't set. + return DAG.getConstant(C1.isNonNegative(), VT); + default: + break; + } + } + + // Otherwise, we can perform the comparison with the low bits. + switch (Cond) { + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(APInt(C1).trunc(InSize), + N0.getOperand(0).getValueType()), + Cond); + default: + break; // todo, be more careful with signed comparisons + } + } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + MVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); + unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); + MVT ExtDstTy = N0.getValueType(); + unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); + + // If the extended part has any inconsistent bits, it cannot ever + // compare equal. In other words, they have to be all ones or all + // zeros. + APInt ExtBits = + APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits); + if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits) + return DAG.getConstant(Cond == ISD::SETNE, VT); + + SDValue ZextOp; + MVT Op0Ty = N0.getOperand(0).getValueType(); + if (Op0Ty == ExtSrcTy) { + ZextOp = N0.getOperand(0); + } else { + APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); + ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), + DAG.getConstant(Imm, Op0Ty)); + } + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(ZextOp.getNode()); + // Otherwise, make this a use of a zext. + return DAG.getSetCC(dl, VT, ZextOp, + DAG.getConstant(C1 & APInt::getLowBitsSet( + ExtDstTyBits, + ExtSrcTyBits), + ExtDstTy), + Cond); + } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + + // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC + if (N0.getOpcode() == ISD::SETCC) { + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1); + if (TrueWhenTrue) + return N0; + + // Invert the condition. + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + CC = ISD::getSetCCInverse(CC, + N0.getOperand(0).getValueType().isInteger()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + } + + if ((N0.getOpcode() == ISD::XOR || + (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR && + N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && + isa<ConstantSDNode>(N0.getOperand(1)) && + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) { + // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We + // can only do this if the top bits are known zero. + unsigned BitWidth = N0.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N0, + APInt::getHighBitsSet(BitWidth, + BitWidth-1))) { + // Okay, get the un-inverted input value. + SDValue Val; + if (N0.getOpcode() == ISD::XOR) + Val = N0.getOperand(0); + else { + assert(N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR); + // ((X^1)&1)^1 -> X & 1 + Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), + N0.getOperand(0).getOperand(0), + N0.getOperand(1)); + } + return DAG.getSetCC(dl, VT, Val, N1, + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + } + } + + APInt MinVal, MaxVal; + unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); + if (ISD::isSignedIntSetCC(Cond)) { + MinVal = APInt::getSignedMinValue(OperandBitSize); + MaxVal = APInt::getSignedMaxValue(OperandBitSize); + } else { + MinVal = APInt::getMinValue(OperandBitSize); + MaxVal = APInt::getMaxValue(OperandBitSize); + } + + // Canonicalize GE/LE comparisons to use GT/LT comparisons. + if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { + if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true + // X >= C0 --> X > (C0-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C1-1, N1.getValueType()), + (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); + } + + if (Cond == ISD::SETLE || Cond == ISD::SETULE) { + if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true + // X <= C0 --> X < (C0+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C1+1, N1.getValueType()), + (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); + } + + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) + return DAG.getConstant(0, VT); // X < MIN --> false + if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) + return DAG.getConstant(1, VT); // X >= MIN --> true + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) + return DAG.getConstant(0, VT); // X > MAX --> false + if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) + return DAG.getConstant(1, VT); // X <= MAX --> true + + // Canonicalize setgt X, Min --> setne X, Min + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + // Canonicalize setlt X, Max --> setne X, Max + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setult X, 1, turn it into seteq X, 0 + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, N0.getValueType()), + ISD::SETEQ); + // If we have setugt X, Max-1, turn it into seteq X, Max + else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MaxVal, N0.getValueType()), + ISD::SETEQ); + + // If we have "setcc X, C0", check to see if we can shrink the immediate + // by changing cc. + + // SETUGT X, SINTMAX -> SETLT X, 0 + if (Cond == ISD::SETUGT && + C1 == APInt::getSignedMaxValue(OperandBitSize)) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(0, N1.getValueType()), + ISD::SETLT); + + // SETULT X, SINTMIN -> SETGT X, -1 + if (Cond == ISD::SETULT && + C1 == APInt::getSignedMinValue(OperandBitSize)) { + SDValue ConstMinusOne = + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), + N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + } + + // Fold bit comparisons when we can. + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + VT == N0.getValueType() && N0.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = + dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + MVT ShiftTy = DCI.isBeforeLegalize() ? + getPointerTy() : getShiftAmountTy(); + if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 + // Perform the xform if the AND RHS is a single bit. + if (isPowerOf2_64(AndRHS->getZExtValue())) { + return DAG.getNode(ISD::SRL, dl, VT, N0, + DAG.getConstant(Log2_64(AndRHS->getZExtValue()), + ShiftTy)); + } + } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) { + // (X & 8) == 8 --> (X & 8) >> 3 + // Perform the xform if C1 is a single bit. + if (C1.isPowerOf2()) { + return DAG.getNode(ISD::SRL, dl, VT, N0, + DAG.getConstant(C1.logBase2(), ShiftTy)); + } + } + } + } + } else if (isa<ConstantSDNode>(N0.getNode())) { + // Ensure that the constant occurs on the RHS. + return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + } + + if (isa<ConstantFPSDNode>(N0.getNode())) { + // Constant fold or commute setcc. + SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl); + if (O.getNode()) return O; + } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) { + // If the RHS of an FP comparison is a constant, simplify it away in + // some cases. + if (CFP->getValueAPF().isNaN()) { + // If an operand is known to be a nan, we can fold it. + switch (ISD::getUnorderedFlavor(Cond)) { + default: assert(0 && "Unknown flavor!"); + case 0: // Known false. + return DAG.getConstant(0, VT); + case 1: // Known true. + return DAG.getConstant(1, VT); + case 2: // Undefined. + return DAG.getUNDEF(VT); + } + } + + // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the + // constant if knowing that the operand is non-nan is enough. We prefer to + // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to + // materialize 0.0. + if (Cond == ISD::SETO || Cond == ISD::SETUO) + return DAG.getSetCC(dl, VT, N0, N0, Cond); + } + + if (N0 == N1) { + // We can always fold X == X for integer setcc's. + if (N0.getValueType().isInteger()) + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + unsigned UOF = ISD::getUnorderedFlavor(Cond); + if (UOF == 2) // FP operators that are undefined on NaNs. + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) + return DAG.getConstant(UOF, VT); + // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO + // if it is not already. + ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; + if (NewCond != Cond) + return DAG.getSetCC(dl, VT, N0, N1, NewCond); + } + + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getValueType().isInteger()) { + if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB || + N0.getOpcode() == ISD::XOR) { + // Simplify (X+Y) == (X+Z) --> Y == Z + if (N0.getOpcode() == N1.getOpcode()) { + if (N0.getOperand(0) == N1.getOperand(0)) + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond); + if (N0.getOperand(1) == N1.getOperand(1)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); + if (DAG.isCommutativeBinOp(N0.getOpcode())) { + // If X op Y == Y op X, try other combinations. + if (N0.getOperand(0) == N1.getOperand(1)) + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), + Cond); + if (N0.getOperand(1) == N1.getOperand(0)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), + Cond); + } + } + + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) { + if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + // Turn (X+C1) == C2 --> X == C2-C1 + if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(RHSC->getAPIntValue()- + LHSR->getAPIntValue(), + N0.getValueType()), Cond); + } + + // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. + if (N0.getOpcode() == ISD::XOR) + // If we know that all of the inverted bits are zero, don't bother + // performing the inversion. + if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue())) + return + DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(LHSR->getAPIntValue() ^ + RHSC->getAPIntValue(), + N0.getValueType()), + Cond); + } + + // Turn (C1-X) == C2 --> X == C1-C2 + if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { + if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { + return + DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(SUBC->getAPIntValue() - + RHSC->getAPIntValue(), + N0.getValueType()), + Cond); + } + } + } + + // Simplify (X+Z) == X --> Z == 0 + if (N0.getOperand(0) == N1) + return DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(0, N0.getValueType()), Cond); + if (N0.getOperand(1) == N1) { + if (DAG.isCommutativeBinOp(N0.getOpcode())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(0, N0.getValueType()), Cond); + else if (N0.getNode()->hasOneUse()) { + assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); + // (Z-X) == X --> Z == X<<1 + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), + N1, + DAG.getConstant(1, getShiftAmountTy())); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + } + } + } + + if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || + N1.getOpcode() == ISD::XOR) { + // Simplify X == (X+Z) --> Z == 0 + if (N1.getOperand(0) == N0) { + return DAG.getSetCC(dl, VT, N1.getOperand(1), + DAG.getConstant(0, N1.getValueType()), Cond); + } else if (N1.getOperand(1) == N0) { + if (DAG.isCommutativeBinOp(N1.getOpcode())) { + return DAG.getSetCC(dl, VT, N1.getOperand(0), + DAG.getConstant(0, N1.getValueType()), Cond); + } else if (N1.getNode()->hasOneUse()) { + assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); + // X == (Z-X) --> X<<1 == Z + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, + DAG.getConstant(1, getShiftAmountTy())); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); + } + } + } + + // Simplify x&y == y to x&y != 0 if y has exactly one bit set. + // Note that where y is variable and is known to have at most + // one bit set (for example, if it is z&1) we cannot do this; + // the expressions are not equivalent when y==0. + if (N0.getOpcode() == ISD::AND) + if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { + if (ValueHasExactlyOneBitSet(N1, DAG)) { + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + SDValue Zero = DAG.getConstant(0, N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, Zero, Cond); + } + } + if (N1.getOpcode() == ISD::AND) + if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { + if (ValueHasExactlyOneBitSet(N0, DAG)) { + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N1, Zero, Cond); + } + } + } + + // Fold away ALL boolean setcc's. + SDValue Temp; + if (N0.getValueType() == MVT::i1 && foldBooleans) { + switch (Cond) { + default: assert(0 && "Unknown integer setcc!"); + case ISD::SETEQ: // X == Y -> ~(X^Y) + Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + N0 = DAG.getNOT(dl, Temp, MVT::i1); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETNE: // X != Y --> (X^Y) + N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + break; + case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y + case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y + Temp = DAG.getNOT(dl, N0, MVT::i1); + N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X + case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X + Temp = DAG.getNOT(dl, N1, MVT::i1); + N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y + case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y + Temp = DAG.getNOT(dl, N0, MVT::i1); + N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X + case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X + Temp = DAG.getNOT(dl, N1, MVT::i1); + N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp); + break; + } + if (VT != MVT::i1) { + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(N0.getNode()); + // FIXME: If running after legalize, we probably can't do this. + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0); + } + return N0; + } + + // Could not fold it. + return SDValue(); +} + +/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the +/// node is a GlobalAddress + offset. +bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA, + int64_t &Offset) const { + if (isa<GlobalAddressSDNode>(N)) { + GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N); + GA = GASD->getGlobal(); + Offset += GASD->getOffset(); + return true; + } + + if (N->getOpcode() == ISD::ADD) { + SDValue N1 = N->getOperand(0); + SDValue N2 = N->getOperand(1); + if (isGAPlusOffset(N1.getNode(), GA, Offset)) { + ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); + if (V) { + Offset += V->getSExtValue(); + return true; + } + } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) { + ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); + if (V) { + Offset += V->getSExtValue(); + return true; + } + } + } + return false; +} + + +/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is +/// loading 'Bytes' bytes from a location that is 'Dist' units away from the +/// location that the 'Base' load is loading from. +bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base, + unsigned Bytes, int Dist, + const MachineFrameInfo *MFI) const { + if (LD->getOperand(0).getNode() != Base->getOperand(0).getNode()) + return false; + MVT VT = LD->getValueType(0); + if (VT.getSizeInBits() / 8 != Bytes) + return false; + + SDValue Loc = LD->getOperand(1); + SDValue BaseLoc = Base->getOperand(1); + if (Loc.getOpcode() == ISD::FrameIndex) { + if (BaseLoc.getOpcode() != ISD::FrameIndex) + return false; + int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); + int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); + int FS = MFI->getObjectSize(FI); + int BFS = MFI->getObjectSize(BFI); + if (FS != BFS || FS != (int)Bytes) return false; + return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); + } + + GlobalValue *GV1 = NULL; + GlobalValue *GV2 = NULL; + int64_t Offset1 = 0; + int64_t Offset2 = 0; + bool isGA1 = isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + if (isGA1 && isGA2 && GV1 == GV2) + return Offset1 == (Offset2 + Dist*Bytes); + return false; +} + + +SDValue TargetLowering:: +PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { + // Default implementation: no optimization. + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Inline Assembler Implementation Methods +//===----------------------------------------------------------------------===// + + +TargetLowering::ConstraintType +TargetLowering::getConstraintType(const std::string &Constraint) const { + // FIXME: lots more standard ones to handle. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'r': return C_RegisterClass; + case 'm': // memory + case 'o': // offsetable + case 'V': // not offsetable + return C_Memory; + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 's': // Relocatable Constant + case 'X': // Allow ANY value. + case 'I': // Target registers. + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + return C_Other; + } + } + + if (Constraint.size() > 1 && Constraint[0] == '{' && + Constraint[Constraint.size()-1] == '}') + return C_Register; + return C_Unknown; +} + +/// LowerXConstraint - try to replace an X constraint, which matches anything, +/// with another that has more specific requirements based on the type of the +/// corresponding operand. +const char *TargetLowering::LowerXConstraint(MVT ConstraintVT) const{ + if (ConstraintVT.isInteger()) + return "r"; + if (ConstraintVT.isFloatingPoint()) + return "f"; // works for many targets + return 0; +} + +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, + char ConstraintLetter, + bool hasMemory, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { + switch (ConstraintLetter) { + default: break; + case 'X': // Allows any operand; labels (basic block) use this. + if (Op.getOpcode() == ISD::BasicBlock) { + Ops.push_back(Op); + return; + } + // fall through + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 's': { // Relocatable Constant + // These operands are interested in values of the form (GV+C), where C may + // be folded in as an offset of GV, or it may be explicitly added. Also, it + // is possible and fine if either GV or C are missing. + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); + GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); + + // If we have "(add GV, C)", pull out GV/C + if (Op.getOpcode() == ISD::ADD) { + C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); + if (C == 0 || GA == 0) { + C = dyn_cast<ConstantSDNode>(Op.getOperand(0)); + GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1)); + } + if (C == 0 || GA == 0) + C = 0, GA = 0; + } + + // If we find a valid operand, map to the TargetXXX version so that the + // value itself doesn't get selected. + if (GA) { // Either &GV or &GV+C + if (ConstraintLetter != 'n') { + int64_t Offs = GA->getOffset(); + if (C) Offs += C->getZExtValue(); + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + Op.getValueType(), Offs)); + return; + } + } + if (C) { // just C, no GV. + // Simple constants are not allowed for 's'. + if (ConstraintLetter != 's') { + // gcc prints these as sign extended. Sign extend value to 64 bits + // now; without this it would get ZExt'd later in + // ScheduleDAGSDNodes::EmitNode, which is very generic. + Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(), + MVT::i64)); + return; + } + } + break; + } + } +} + +std::vector<unsigned> TargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + return std::vector<unsigned>(); +} + + +std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + if (Constraint[0] != '{') + return std::pair<unsigned, const TargetRegisterClass*>(0, 0); + assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); + + // Remove the braces from around the name. + std::string RegName(Constraint.begin()+1, Constraint.end()-1); + + // Figure out which register class contains this reg. + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), + E = RI->regclass_end(); RCI != E; ++RCI) { + const TargetRegisterClass *RC = *RCI; + + // If none of the the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + bool isLegal = false; + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (isTypeLegal(*I)) { + isLegal = true; + break; + } + } + + if (!isLegal) continue; + + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) { + if (StringsEqualNoCase(RegName, RI->get(*I).AsmName)) + return std::make_pair(*I, RC); + } + } + + return std::pair<unsigned, const TargetRegisterClass*>(0, 0); +} + +//===----------------------------------------------------------------------===// +// Constraint Selection. + +/// isMatchingInputConstraint - Return true of this is an input operand that is +/// a matching constraint like "4". +bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { + assert(!ConstraintCode.empty() && "No known constraint!"); + return isdigit(ConstraintCode[0]); +} + +/// getMatchedOperand - If this is an input matching constraint, this method +/// returns the output operand it matches. +unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { + assert(!ConstraintCode.empty() && "No known constraint!"); + return atoi(ConstraintCode.c_str()); +} + + +/// getConstraintGenerality - Return an integer indicating how general CT +/// is. +static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { + switch (CT) { + default: assert(0 && "Unknown constraint type!"); + case TargetLowering::C_Other: + case TargetLowering::C_Unknown: + return 0; + case TargetLowering::C_Register: + return 1; + case TargetLowering::C_RegisterClass: + return 2; + case TargetLowering::C_Memory: + return 3; + } +} + +/// ChooseConstraint - If there are multiple different constraints that we +/// could pick for this operand (e.g. "imr") try to pick the 'best' one. +/// This is somewhat tricky: constraints fall into four classes: +/// Other -> immediates and magic values +/// Register -> one specific register +/// RegisterClass -> a group of regs +/// Memory -> memory +/// Ideally, we would pick the most specific constraint possible: if we have +/// something that fits into a register, we would pick it. The problem here +/// is that if we have something that could either be in a register or in +/// memory that use of the register could cause selection of *other* +/// operands to fail: they might only succeed if we pick memory. Because of +/// this the heuristic we use is: +/// +/// 1) If there is an 'other' constraint, and if the operand is valid for +/// that constraint, use it. This makes us take advantage of 'i' +/// constraints when available. +/// 2) Otherwise, pick the most general constraint present. This prefers +/// 'm' over 'r', for example. +/// +static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, + bool hasMemory, const TargetLowering &TLI, + SDValue Op, SelectionDAG *DAG) { + assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); + unsigned BestIdx = 0; + TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; + int BestGenerality = -1; + + // Loop over the options, keeping track of the most general one. + for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { + TargetLowering::ConstraintType CType = + TLI.getConstraintType(OpInfo.Codes[i]); + + // If this is an 'other' constraint, see if the operand is valid for it. + // For example, on X86 we might have an 'rI' constraint. If the operand + // is an integer in the range [0..31] we want to use I (saving a load + // of a register), otherwise we must use 'r'. + if (CType == TargetLowering::C_Other && Op.getNode()) { + assert(OpInfo.Codes[i].size() == 1 && + "Unhandled multi-letter 'other' constraint"); + std::vector<SDValue> ResultOps; + TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory, + ResultOps, *DAG); + if (!ResultOps.empty()) { + BestType = CType; + BestIdx = i; + break; + } + } + + // This constraint letter is more general than the previous one, use it. + int Generality = getConstraintGenerality(CType); + if (Generality > BestGenerality) { + BestType = CType; + BestIdx = i; + BestGenerality = Generality; + } + } + + OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; + OpInfo.ConstraintType = BestType; +} + +/// ComputeConstraintToUse - Determines the constraint code and constraint +/// type to use for the specific AsmOperandInfo, setting +/// OpInfo.ConstraintCode and OpInfo.ConstraintType. +void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, + SDValue Op, + bool hasMemory, + SelectionDAG *DAG) const { + assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); + + // Single-letter constraints ('r') are very common. + if (OpInfo.Codes.size() == 1) { + OpInfo.ConstraintCode = OpInfo.Codes[0]; + OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); + } else { + ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG); + } + + // 'X' matches anything. + if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { + // Labels and constants are handled elsewhere ('X' is the only thing + // that matches labels). + if (isa<BasicBlock>(OpInfo.CallOperandVal) || + isa<ConstantInt>(OpInfo.CallOperandVal)) + return; + + // Otherwise, try to resolve it to something we know about by looking at + // the actual operand type. + if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) { + OpInfo.ConstraintCode = Repl; + OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); + } + } +} + +//===----------------------------------------------------------------------===// +// Loop Strength Reduction hooks +//===----------------------------------------------------------------------===// + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const { + // The default implementation of this implements a conservative RISCy, r+r and + // r+i addr mode. + + // Allows a sign-extended 16-bit immediate field. + if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) + return false; + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // Only support r+r, + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + return false; + // Otherwise we have r+r or r+i. + break; + case 2: + if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + return false; + // Allow 2*r as r+r. + break; + } + + return true; +} + +/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, + std::vector<SDNode*>* Created) const { + MVT VT = N->getValueType(0); + DebugLoc dl= N->getDebugLoc(); + + // Check to see if we can do this. + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); + APInt::ms magics = d.magic(); + + // Multiply the numerator (operand 0) by the magic value + // FIXME: We should support doing a MUL in a wider type + SDValue Q; + if (isOperationLegalOrCustom(ISD::MULHS, VT)) + Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) + Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), + N->getOperand(0), + DAG.getConstant(magics.m, VT)).getNode(), 1); + else + return SDValue(); // No mulhs or equvialent + // If d > 0 and m < 0, add the numerator + if (d.isStrictlyPositive() && magics.m.isNegative()) { + Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); + if (Created) + Created->push_back(Q.getNode()); + } + // If d < 0 and m > 0, subtract the numerator. + if (d.isNegative() && magics.m.isStrictlyPositive()) { + Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); + if (Created) + Created->push_back(Q.getNode()); + } + // Shift right algebraic if shift value is nonzero + if (magics.s > 0) { + Q = DAG.getNode(ISD::SRA, dl, VT, Q, + DAG.getConstant(magics.s, getShiftAmountTy())); + if (Created) + Created->push_back(Q.getNode()); + } + // Extract the sign bit and add it to the quotient + SDValue T = + DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1, + getShiftAmountTy())); + if (Created) + Created->push_back(T.getNode()); + return DAG.getNode(ISD::ADD, dl, VT, Q, T); +} + +/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. See: +/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, + std::vector<SDNode*>* Created) const { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Check to see if we can do this. + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + // FIXME: We should use a narrower constant when the upper + // bits are known to be zero. + ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1)); + APInt::mu magics = N1C->getAPIntValue().magicu(); + + // Multiply the numerator (operand 0) by the magic value + // FIXME: We should support doing a MUL in a wider type + SDValue Q; + if (isOperationLegalOrCustom(ISD::MULHU, VT)) + Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) + Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), + N->getOperand(0), + DAG.getConstant(magics.m, VT)).getNode(), 1); + else + return SDValue(); // No mulhu or equvialent + if (Created) + Created->push_back(Q.getNode()); + + if (magics.a == 0) { + assert(magics.s < N1C->getAPIntValue().getBitWidth() && + "We shouldn't generate an undefined shift!"); + return DAG.getNode(ISD::SRL, dl, VT, Q, + DAG.getConstant(magics.s, getShiftAmountTy())); + } else { + SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); + if (Created) + Created->push_back(NPQ.getNode()); + NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, + DAG.getConstant(1, getShiftAmountTy())); + if (Created) + Created->push_back(NPQ.getNode()); + NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); + if (Created) + Created->push_back(NPQ.getNode()); + return DAG.getNode(ISD::SRL, dl, VT, NPQ, + DAG.getConstant(magics.s-1, getShiftAmountTy())); + } +} + +/// IgnoreHarmlessInstructions - Ignore instructions between a CALL and RET +/// node that don't prevent tail call optimization. +static SDValue IgnoreHarmlessInstructions(SDValue node) { + // Found call return. + if (node.getOpcode() == ISD::CALL) return node; + // Ignore MERGE_VALUES. Will have at least one operand. + if (node.getOpcode() == ISD::MERGE_VALUES) + return IgnoreHarmlessInstructions(node.getOperand(0)); + // Ignore ANY_EXTEND node. + if (node.getOpcode() == ISD::ANY_EXTEND) + return IgnoreHarmlessInstructions(node.getOperand(0)); + if (node.getOpcode() == ISD::TRUNCATE) + return IgnoreHarmlessInstructions(node.getOperand(0)); + // Any other node type. + return node; +} + +bool TargetLowering::CheckTailCallReturnConstraints(CallSDNode *TheCall, + SDValue Ret) { + unsigned NumOps = Ret.getNumOperands(); + // ISD::CALL results:(value0, ..., valuen, chain) + // ISD::RET operands:(chain, value0, flag0, ..., valuen, flagn) + // Value return: + // Check that operand of the RET node sources from the CALL node. The RET node + // has at least two operands. Operand 0 holds the chain. Operand 1 holds the + // value. + if (NumOps > 1 && + IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0)) + return true; + // void return: The RET node has the chain result value of the CALL node as + // input. + if (NumOps == 1 && + Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1)) + return true; + + return false; +} |