diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2013-04-08 18:41:23 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2013-04-08 18:41:23 +0000 | 
| commit | 4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (patch) | |
| tree | 06099edc18d30894081a822b756f117cbe0b8207 /lib/Target/R600/AMDILISelLowering.cpp | |
| parent | 482e7bddf617ae804dc47133cb07eb4aa81e45de (diff) | |
Diffstat (limited to 'lib/Target/R600/AMDILISelLowering.cpp')
| -rw-r--r-- | lib/Target/R600/AMDILISelLowering.cpp | 647 | 
1 files changed, 647 insertions, 0 deletions
| diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp new file mode 100644 index 000000000000..922cac12b98e --- /dev/null +++ b/lib/Target/R600/AMDILISelLowering.cpp @@ -0,0 +1,647 @@ +//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// \brief TargetLowering functions borrowed from AMDIL. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUISelLowering.h" +#include "AMDGPURegisterInfo.h" +#include "AMDGPUSubtarget.h" +#include "AMDILDevices.h" +#include "AMDILIntrinsicInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; +//===----------------------------------------------------------------------===// +// TargetLowering Implementation Help Functions End +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// TargetLowering Class Implementation Begins +//===----------------------------------------------------------------------===// +void AMDGPUTargetLowering::InitAMDILLowering() { +  int types[] = { +    (int)MVT::i8, +    (int)MVT::i16, +    (int)MVT::i32, +    (int)MVT::f32, +    (int)MVT::f64, +    (int)MVT::i64, +    (int)MVT::v2i8, +    (int)MVT::v4i8, +    (int)MVT::v2i16, +    (int)MVT::v4i16, +    (int)MVT::v4f32, +    (int)MVT::v4i32, +    (int)MVT::v2f32, +    (int)MVT::v2i32, +    (int)MVT::v2f64, +    (int)MVT::v2i64 +  }; + +  int IntTypes[] = { +    (int)MVT::i8, +    (int)MVT::i16, +    (int)MVT::i32, +    (int)MVT::i64 +  }; + +  int FloatTypes[] = { +    (int)MVT::f32, +    (int)MVT::f64 +  }; + +  int VectorTypes[] = { +    (int)MVT::v2i8, +    (int)MVT::v4i8, +    (int)MVT::v2i16, +    (int)MVT::v4i16, +    (int)MVT::v4f32, +    (int)MVT::v4i32, +    (int)MVT::v2f32, +    (int)MVT::v2i32, +    (int)MVT::v2f64, +    (int)MVT::v2i64 +  }; +  size_t NumTypes = sizeof(types) / sizeof(*types); +  size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); +  size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); +  size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); + +  const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>(); +  // These are the current register classes that are +  // supported + +  for (unsigned int x  = 0; x < NumTypes; ++x) { +    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; + +    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types +    // We cannot sextinreg, expand to shifts +    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); +    setOperationAction(ISD::SUBE, VT, Expand); +    setOperationAction(ISD::SUBC, VT, Expand); +    setOperationAction(ISD::ADDE, VT, Expand); +    setOperationAction(ISD::ADDC, VT, Expand); +    setOperationAction(ISD::BRCOND, VT, Custom); +    setOperationAction(ISD::BR_JT, VT, Expand); +    setOperationAction(ISD::BRIND, VT, Expand); +    // TODO: Implement custom UREM/SREM routines +    setOperationAction(ISD::SREM, VT, Expand); +    setOperationAction(ISD::SMUL_LOHI, VT, Expand); +    setOperationAction(ISD::UMUL_LOHI, VT, Expand); +    if (VT != MVT::i64 && VT != MVT::v2i64) { +      setOperationAction(ISD::SDIV, VT, Custom); +    } +  } +  for (unsigned int x = 0; x < NumFloatTypes; ++x) { +    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; + +    // IL does not have these operations for floating point types +    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); +    setOperationAction(ISD::SETOLT, VT, Expand); +    setOperationAction(ISD::SETOGE, VT, Expand); +    setOperationAction(ISD::SETOGT, VT, Expand); +    setOperationAction(ISD::SETOLE, VT, Expand); +    setOperationAction(ISD::SETULT, VT, Expand); +    setOperationAction(ISD::SETUGE, VT, Expand); +    setOperationAction(ISD::SETUGT, VT, Expand); +    setOperationAction(ISD::SETULE, VT, Expand); +  } + +  for (unsigned int x = 0; x < NumIntTypes; ++x) { +    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; + +    // GPU also does not have divrem function for signed or unsigned +    setOperationAction(ISD::SDIVREM, VT, Expand); + +    // GPU does not have [S|U]MUL_LOHI functions as a single instruction +    setOperationAction(ISD::SMUL_LOHI, VT, Expand); +    setOperationAction(ISD::UMUL_LOHI, VT, Expand); + +    // GPU doesn't have a rotl, rotr, or byteswap instruction +    setOperationAction(ISD::ROTR, VT, Expand); +    setOperationAction(ISD::BSWAP, VT, Expand); + +    // GPU doesn't have any counting operators +    setOperationAction(ISD::CTPOP, VT, Expand); +    setOperationAction(ISD::CTTZ, VT, Expand); +    setOperationAction(ISD::CTLZ, VT, Expand); +  } + +  for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) { +    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; + +    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); +    setOperationAction(ISD::SDIVREM, VT, Expand); +    setOperationAction(ISD::SMUL_LOHI, VT, Expand); +    // setOperationAction(ISD::VSETCC, VT, Expand); +    setOperationAction(ISD::SELECT_CC, VT, Expand); + +  } +  if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) { +    setOperationAction(ISD::MULHU, MVT::i64, Expand); +    setOperationAction(ISD::MULHU, MVT::v2i64, Expand); +    setOperationAction(ISD::MULHS, MVT::i64, Expand); +    setOperationAction(ISD::MULHS, MVT::v2i64, Expand); +    setOperationAction(ISD::ADD, MVT::v2i64, Expand); +    setOperationAction(ISD::SREM, MVT::v2i64, Expand); +    setOperationAction(ISD::Constant          , MVT::i64  , Legal); +    setOperationAction(ISD::SDIV, MVT::v2i64, Expand); +    setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); +    setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); +    setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); +    setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); +  } +  if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) { +    // we support loading/storing v2f64 but not operations on the type +    setOperationAction(ISD::FADD, MVT::v2f64, Expand); +    setOperationAction(ISD::FSUB, MVT::v2f64, Expand); +    setOperationAction(ISD::FMUL, MVT::v2f64, Expand); +    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); +    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); +    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal); +    // We want to expand vector conversions into their scalar +    // counterparts. +    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); +    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); +    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); +    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); +    setOperationAction(ISD::FABS, MVT::f64, Expand); +    setOperationAction(ISD::FABS, MVT::v2f64, Expand); +  } +  // TODO: Fix the UDIV24 algorithm so it works for these +  // types correctly. This needs vector comparisons +  // for this to work correctly. +  setOperationAction(ISD::UDIV, MVT::v2i8, Expand); +  setOperationAction(ISD::UDIV, MVT::v4i8, Expand); +  setOperationAction(ISD::UDIV, MVT::v2i16, Expand); +  setOperationAction(ISD::UDIV, MVT::v4i16, Expand); +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); +  setOperationAction(ISD::SUBC, MVT::Other, Expand); +  setOperationAction(ISD::ADDE, MVT::Other, Expand); +  setOperationAction(ISD::ADDC, MVT::Other, Expand); +  setOperationAction(ISD::BRCOND, MVT::Other, Custom); +  setOperationAction(ISD::BR_JT, MVT::Other, Expand); +  setOperationAction(ISD::BRIND, MVT::Other, Expand); +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); + + +  // Use the default implementation. +  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal); +  setOperationAction(ISD::Constant          , MVT::i32    , Legal); + +  setSchedulingPreference(Sched::RegPressure); +  setPow2DivIsCheap(false); +  setSelectIsExpensive(true); +  setJumpIsExpensive(true); + +  MaxStoresPerMemcpy  = 4096; +  MaxStoresPerMemmove = 4096; +  MaxStoresPerMemset  = 4096; + +} + +bool +AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, +    const CallInst &I, unsigned Intrinsic) const { +  return false; +} + +// The backend supports 32 and 64 bit floating point immediates +bool +AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { +  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 +      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { +    return true; +  } else { +    return false; +  } +} + +bool +AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { +  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 +      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { +    return false; +  } else { +    return true; +  } +} + + +// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to +// be zero. Op is expected to be a target specific node. Used by DAG +// combiner. + +void +AMDGPUTargetLowering::computeMaskedBitsForTargetNode( +    const SDValue Op, +    APInt &KnownZero, +    APInt &KnownOne, +    const SelectionDAG &DAG, +    unsigned Depth) const { +  APInt KnownZero2; +  APInt KnownOne2; +  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything +  switch (Op.getOpcode()) { +    default: break; +    case ISD::SELECT_CC: +             DAG.ComputeMaskedBits( +                 Op.getOperand(1), +                 KnownZero, +                 KnownOne, +                 Depth + 1 +                 ); +             DAG.ComputeMaskedBits( +                 Op.getOperand(0), +                 KnownZero2, +                 KnownOne2 +                 ); +             assert((KnownZero & KnownOne) == 0 +                 && "Bits known to be one AND zero?"); +             assert((KnownZero2 & KnownOne2) == 0 +                 && "Bits known to be one AND zero?"); +             // Only known if known in both the LHS and RHS +             KnownOne &= KnownOne2; +             KnownZero &= KnownZero2; +             break; +  }; +} + +//===----------------------------------------------------------------------===// +//                           Other Lowering Hooks +//===----------------------------------------------------------------------===// + +SDValue +AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { +  EVT OVT = Op.getValueType(); +  SDValue DST; +  if (OVT.getScalarType() == MVT::i64) { +    DST = LowerSDIV64(Op, DAG); +  } else if (OVT.getScalarType() == MVT::i32) { +    DST = LowerSDIV32(Op, DAG); +  } else if (OVT.getScalarType() == MVT::i16 +      || OVT.getScalarType() == MVT::i8) { +    DST = LowerSDIV24(Op, DAG); +  } else { +    DST = SDValue(Op.getNode(), 0); +  } +  return DST; +} + +SDValue +AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const { +  EVT OVT = Op.getValueType(); +  SDValue DST; +  if (OVT.getScalarType() == MVT::i64) { +    DST = LowerSREM64(Op, DAG); +  } else if (OVT.getScalarType() == MVT::i32) { +    DST = LowerSREM32(Op, DAG); +  } else if (OVT.getScalarType() == MVT::i16) { +    DST = LowerSREM16(Op, DAG); +  } else if (OVT.getScalarType() == MVT::i8) { +    DST = LowerSREM8(Op, DAG); +  } else { +    DST = SDValue(Op.getNode(), 0); +  } +  return DST; +} + +SDValue +AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { +  SDValue Data = Op.getOperand(0); +  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); +  DebugLoc DL = Op.getDebugLoc(); +  EVT DVT = Data.getValueType(); +  EVT BVT = BaseType->getVT(); +  unsigned baseBits = BVT.getScalarType().getSizeInBits(); +  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; +  unsigned shiftBits = srcBits - baseBits; +  if (srcBits < 32) { +    // If the op is less than 32 bits, then it needs to extend to 32bits +    // so it can properly keep the upper bits valid. +    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); +    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); +    shiftBits = 32 - baseBits; +    DVT = IVT; +  } +  SDValue Shift = DAG.getConstant(shiftBits, DVT); +  // Shift left by 'Shift' bits. +  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); +  // Signed shift Right by 'Shift' bits. +  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); +  if (srcBits < 32) { +    // Once the sign extension is done, the op needs to be converted to +    // its original type. +    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); +  } +  return Data; +} +EVT +AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const { +  int iSize = (size * numEle); +  int vEle = (iSize >> ((size == 64) ? 6 : 5)); +  if (!vEle) { +    vEle = 1; +  } +  if (size == 64) { +    if (vEle == 1) { +      return EVT(MVT::i64); +    } else { +      return EVT(MVT::getVectorVT(MVT::i64, vEle)); +    } +  } else { +    if (vEle == 1) { +      return EVT(MVT::i32); +    } else { +      return EVT(MVT::getVectorVT(MVT::i32, vEle)); +    } +  } +} + +SDValue +AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { +  SDValue Chain = Op.getOperand(0); +  SDValue Cond  = Op.getOperand(1); +  SDValue Jump  = Op.getOperand(2); +  SDValue Result; +  Result = DAG.getNode( +      AMDGPUISD::BRANCH_COND, +      Op.getDebugLoc(), +      Op.getValueType(), +      Chain, Jump, Cond); +  return Result; +} + +SDValue +AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const { +  DebugLoc DL = Op.getDebugLoc(); +  EVT OVT = Op.getValueType(); +  SDValue LHS = Op.getOperand(0); +  SDValue RHS = Op.getOperand(1); +  MVT INTTY; +  MVT FLTTY; +  if (!OVT.isVector()) { +    INTTY = MVT::i32; +    FLTTY = MVT::f32; +  } else if (OVT.getVectorNumElements() == 2) { +    INTTY = MVT::v2i32; +    FLTTY = MVT::v2f32; +  } else if (OVT.getVectorNumElements() == 4) { +    INTTY = MVT::v4i32; +    FLTTY = MVT::v4f32; +  } +  unsigned bitsize = OVT.getScalarType().getSizeInBits(); +  // char|short jq = ia ^ ib; +  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); + +  // jq = jq >> (bitsize - 2) +  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));  + +  // jq = jq | 0x1 +  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); + +  // jq = (int)jq +  jq = DAG.getSExtOrTrunc(jq, DL, INTTY); + +  // int ia = (int)LHS; +  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); + +  // int ib, (int)RHS; +  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); + +  // float fa = (float)ia; +  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); + +  // float fb = (float)ib; +  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); + +  // float fq = native_divide(fa, fb); +  SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb); + +  // fq = trunc(fq); +  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); + +  // float fqneg = -fq; +  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); + +  // float fr = mad(fqneg, fb, fa); +  SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY, +      DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa); + +  // int iq = (int)fq; +  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); + +  // fr = fabs(fr); +  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); + +  // fb = fabs(fb); +  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); + +  // int cv = fr >= fb; +  SDValue cv; +  if (INTTY == MVT::i32) { +    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); +  } else { +    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); +  } +  // jq = (cv ? jq : 0); +  jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,  +      DAG.getConstant(0, OVT)); +  // dst = iq + jq; +  iq = DAG.getSExtOrTrunc(iq, DL, OVT); +  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); +  return iq; +} + +SDValue +AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const { +  DebugLoc DL = Op.getDebugLoc(); +  EVT OVT = Op.getValueType(); +  SDValue LHS = Op.getOperand(0); +  SDValue RHS = Op.getOperand(1); +  // The LowerSDIV32 function generates equivalent to the following IL. +  // mov r0, LHS +  // mov r1, RHS +  // ilt r10, r0, 0 +  // ilt r11, r1, 0 +  // iadd r0, r0, r10 +  // iadd r1, r1, r11 +  // ixor r0, r0, r10 +  // ixor r1, r1, r11 +  // udiv r0, r0, r1 +  // ixor r10, r10, r11 +  // iadd r0, r0, r10 +  // ixor DST, r0, r10 + +  // mov r0, LHS +  SDValue r0 = LHS; + +  // mov r1, RHS +  SDValue r1 = RHS; + +  // ilt r10, r0, 0 +  SDValue r10 = DAG.getSelectCC(DL, +      r0, DAG.getConstant(0, OVT), +      DAG.getConstant(-1, MVT::i32), +      DAG.getConstant(0, MVT::i32), +      ISD::SETLT); + +  // ilt r11, r1, 0 +  SDValue r11 = DAG.getSelectCC(DL, +      r1, DAG.getConstant(0, OVT), +      DAG.getConstant(-1, MVT::i32), +      DAG.getConstant(0, MVT::i32), +      ISD::SETLT); + +  // iadd r0, r0, r10 +  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + +  // iadd r1, r1, r11 +  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); + +  // ixor r0, r0, r10 +  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + +  // ixor r1, r1, r11 +  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); + +  // udiv r0, r0, r1 +  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); + +  // ixor r10, r10, r11 +  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); + +  // iadd r0, r0, r10 +  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + +  // ixor DST, r0, r10 +  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);  +  return DST; +} + +SDValue +AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const { +  return SDValue(Op.getNode(), 0); +} + +SDValue +AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const { +  DebugLoc DL = Op.getDebugLoc(); +  EVT OVT = Op.getValueType(); +  MVT INTTY = MVT::i32; +  if (OVT == MVT::v2i8) { +    INTTY = MVT::v2i32; +  } else if (OVT == MVT::v4i8) { +    INTTY = MVT::v4i32; +  } +  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); +  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); +  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); +  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); +  return LHS; +} + +SDValue +AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const { +  DebugLoc DL = Op.getDebugLoc(); +  EVT OVT = Op.getValueType(); +  MVT INTTY = MVT::i32; +  if (OVT == MVT::v2i16) { +    INTTY = MVT::v2i32; +  } else if (OVT == MVT::v4i16) { +    INTTY = MVT::v4i32; +  } +  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); +  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); +  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); +  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); +  return LHS; +} + +SDValue +AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const { +  DebugLoc DL = Op.getDebugLoc(); +  EVT OVT = Op.getValueType(); +  SDValue LHS = Op.getOperand(0); +  SDValue RHS = Op.getOperand(1); +  // The LowerSREM32 function generates equivalent to the following IL. +  // mov r0, LHS +  // mov r1, RHS +  // ilt r10, r0, 0 +  // ilt r11, r1, 0 +  // iadd r0, r0, r10 +  // iadd r1, r1, r11 +  // ixor r0, r0, r10 +  // ixor r1, r1, r11 +  // udiv r20, r0, r1 +  // umul r20, r20, r1 +  // sub r0, r0, r20 +  // iadd r0, r0, r10 +  // ixor DST, r0, r10 + +  // mov r0, LHS +  SDValue r0 = LHS; + +  // mov r1, RHS +  SDValue r1 = RHS; + +  // ilt r10, r0, 0 +  SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT); + +  // ilt r11, r1, 0 +  SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT); + +  // iadd r0, r0, r10 +  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + +  // iadd r1, r1, r11 +  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); + +  // ixor r0, r0, r10 +  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + +  // ixor r1, r1, r11 +  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); + +  // udiv r20, r0, r1 +  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); + +  // umul r20, r20, r1 +  r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1); + +  // sub r0, r0, r20 +  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); + +  // iadd r0, r0, r10 +  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + +  // ixor DST, r0, r10 +  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);  +  return DST; +} + +SDValue +AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const { +  return SDValue(Op.getNode(), 0); +} | 
