src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2013-04-08 18:41:23 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2013-04-08 18:41:23 +0000
commit	4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (patch)
tree	06099edc18d30894081a822b756f117cbe0b8207 /lib/Target/R600/AMDILISelLowering.cpp
parent	482e7bddf617ae804dc47133cb07eb4aa81e45de (diff)

vendor/llvm/llvm-trunk-r178860

Diffstat (limited to 'lib/Target/R600/AMDILISelLowering.cpp')

-rw-r--r--

lib/Target/R600/AMDILISelLowering.cpp

647

1 files changed, 647 insertions, 0 deletions

diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp
new file mode 100644
index 000000000000..922cac12b98e
--- /dev/null
+++ b/lib/Target/R600/AMDILISelLowering.cpp

@@ -0,0 +1,647 @@

+//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//==-----------------------------------------------------------------------===//

+//

+/// \file

+/// \brief TargetLowering functions borrowed from AMDIL.

+//

+//===----------------------------------------------------------------------===//

+#include "AMDGPUISelLowering.h"

+#include "AMDGPURegisterInfo.h"

+#include "AMDGPUSubtarget.h"

+#include "AMDILDevices.h"

+#include "AMDILIntrinsicInfo.h"

+#include "llvm/CodeGen/MachineFrameInfo.h"

+#include "llvm/CodeGen/MachineRegisterInfo.h"

+#include "llvm/CodeGen/PseudoSourceValue.h"

+#include "llvm/CodeGen/SelectionDAG.h"

+#include "llvm/CodeGen/SelectionDAGNodes.h"

+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"

+#include "llvm/IR/CallingConv.h"

+#include "llvm/IR/DerivedTypes.h"

+#include "llvm/IR/Instructions.h"

+#include "llvm/IR/Intrinsics.h"

+#include "llvm/Support/raw_ostream.h"

+#include "llvm/Target/TargetInstrInfo.h"

+#include "llvm/Target/TargetOptions.h"

+using namespace llvm;

+//===----------------------------------------------------------------------===//

+// TargetLowering Implementation Help Functions End

+//===----------------------------------------------------------------------===//

+// TargetLowering Class Implementation Begins

+//===----------------------------------------------------------------------===//

+void AMDGPUTargetLowering::InitAMDILLowering() {

+ int types[] = {

+ (int)MVT::i8,

+ (int)MVT::i16,

+ (int)MVT::i32,

+ (int)MVT::f32,

+ (int)MVT::f64,

+ (int)MVT::i64,

+ (int)MVT::v2i8,

+ (int)MVT::v4i8,

+ (int)MVT::v2i16,

+ (int)MVT::v4i16,

+ (int)MVT::v4f32,

+ (int)MVT::v4i32,

+ (int)MVT::v2f32,

+ (int)MVT::v2i32,

+ (int)MVT::v2f64,

+ (int)MVT::v2i64

+ };

+ int IntTypes[] = {

+ (int)MVT::i8,

+ (int)MVT::i16,

+ (int)MVT::i32,

+ (int)MVT::i64

+ };

+ int FloatTypes[] = {

+ (int)MVT::f32,

+ (int)MVT::f64

+ };

+ int VectorTypes[] = {

+ (int)MVT::v2i8,

+ (int)MVT::v4i8,

+ (int)MVT::v2i16,

+ (int)MVT::v4i16,

+ (int)MVT::v4f32,

+ (int)MVT::v4i32,

+ (int)MVT::v2f32,

+ (int)MVT::v2i32,

+ (int)MVT::v2f64,

+ (int)MVT::v2i64

+ };

+ size_t NumTypes = sizeof(types) / sizeof(*types);

+ size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);

+ size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);

+ size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);

+ const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();

+ // These are the current register classes that are

+ // supported

+ for (unsigned int x = 0; x < NumTypes; ++x) {

+ MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];

+ //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types

+ // We cannot sextinreg, expand to shifts

+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);

+ setOperationAction(ISD::SUBE, VT, Expand);

+ setOperationAction(ISD::SUBC, VT, Expand);

+ setOperationAction(ISD::ADDE, VT, Expand);

+ setOperationAction(ISD::ADDC, VT, Expand);

+ setOperationAction(ISD::BRCOND, VT, Custom);

+ setOperationAction(ISD::BR_JT, VT, Expand);

+ setOperationAction(ISD::BRIND, VT, Expand);

+ // TODO: Implement custom UREM/SREM routines

+ setOperationAction(ISD::SREM, VT, Expand);

+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);

+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);

+ if (VT != MVT::i64 && VT != MVT::v2i64) {

+ setOperationAction(ISD::SDIV, VT, Custom);

+ }

+ for (unsigned int x = 0; x < NumFloatTypes; ++x) {

+ MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];

+ // IL does not have these operations for floating point types

+ setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);

+ setOperationAction(ISD::SETOLT, VT, Expand);

+ setOperationAction(ISD::SETOGE, VT, Expand);

+ setOperationAction(ISD::SETOGT, VT, Expand);

+ setOperationAction(ISD::SETOLE, VT, Expand);

+ setOperationAction(ISD::SETULT, VT, Expand);

+ setOperationAction(ISD::SETUGE, VT, Expand);

+ setOperationAction(ISD::SETUGT, VT, Expand);

+ setOperationAction(ISD::SETULE, VT, Expand);

+ }

+ for (unsigned int x = 0; x < NumIntTypes; ++x) {

+ MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];

+ // GPU also does not have divrem function for signed or unsigned

+ setOperationAction(ISD::SDIVREM, VT, Expand);

+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction

+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);

+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);

+ // GPU doesn't have a rotl, rotr, or byteswap instruction

+ setOperationAction(ISD::ROTR, VT, Expand);

+ setOperationAction(ISD::BSWAP, VT, Expand);

+ // GPU doesn't have any counting operators

+ setOperationAction(ISD::CTPOP, VT, Expand);

+ setOperationAction(ISD::CTTZ, VT, Expand);

+ setOperationAction(ISD::CTLZ, VT, Expand);

+ }

+ for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {

+ MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];

+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);

+ setOperationAction(ISD::SDIVREM, VT, Expand);

+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);

+ // setOperationAction(ISD::VSETCC, VT, Expand);

+ setOperationAction(ISD::SELECT_CC, VT, Expand);

+ }

+ if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {

+ setOperationAction(ISD::MULHU, MVT::i64, Expand);

+ setOperationAction(ISD::MULHU, MVT::v2i64, Expand);

+ setOperationAction(ISD::MULHS, MVT::i64, Expand);

+ setOperationAction(ISD::MULHS, MVT::v2i64, Expand);

+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);

+ setOperationAction(ISD::SREM, MVT::v2i64, Expand);

+ setOperationAction(ISD::Constant , MVT::i64 , Legal);

+ setOperationAction(ISD::SDIV, MVT::v2i64, Expand);

+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);

+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);

+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);

+ setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);

+ }

+ if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {

+ // we support loading/storing v2f64 but not operations on the type

+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);

+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);

+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);

+ setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);

+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);

+ setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);

+ // We want to expand vector conversions into their scalar

+ // counterparts.

+ setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);

+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);

+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);

+ setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);

+ setOperationAction(ISD::FABS, MVT::f64, Expand);

+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);

+ }

+ // TODO: Fix the UDIV24 algorithm so it works for these

+ // types correctly. This needs vector comparisons

+ // for this to work correctly.

+ setOperationAction(ISD::UDIV, MVT::v2i8, Expand);

+ setOperationAction(ISD::UDIV, MVT::v4i8, Expand);

+ setOperationAction(ISD::UDIV, MVT::v2i16, Expand);

+ setOperationAction(ISD::UDIV, MVT::v4i16, Expand);

+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);

+ setOperationAction(ISD::SUBC, MVT::Other, Expand);

+ setOperationAction(ISD::ADDE, MVT::Other, Expand);

+ setOperationAction(ISD::ADDC, MVT::Other, Expand);

+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);

+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);

+ setOperationAction(ISD::BRIND, MVT::Other, Expand);

+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);

+ // Use the default implementation.

+ setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);

+ setOperationAction(ISD::Constant , MVT::i32 , Legal);

+ setSchedulingPreference(Sched::RegPressure);

+ setPow2DivIsCheap(false);

+ setSelectIsExpensive(true);

+ setJumpIsExpensive(true);

+ MaxStoresPerMemcpy = 4096;

+ MaxStoresPerMemmove = 4096;

+ MaxStoresPerMemset = 4096;

+bool

+AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,

+ const CallInst &I, unsigned Intrinsic) const {

+ return false;

+// The backend supports 32 and 64 bit floating point immediates

+bool

+AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {

+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32

+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {

+ return true;

+ } else {

+ return false;

+ }

+bool

+AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {

+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32

+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {

+ return false;

+ } else {

+ return true;

+ }

+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to

+// be zero. Op is expected to be a target specific node. Used by DAG

+// combiner.

+void

+AMDGPUTargetLowering::computeMaskedBitsForTargetNode(

+ const SDValue Op,

+ APInt &KnownZero,

+ APInt &KnownOne,

+ const SelectionDAG &DAG,

+ unsigned Depth) const {

+ APInt KnownZero2;

+ APInt KnownOne2;

+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything

+ switch (Op.getOpcode()) {

+ default: break;

+ case ISD::SELECT_CC:

+ DAG.ComputeMaskedBits(

+ Op.getOperand(1),

+ KnownZero,

+ KnownOne,

+ Depth + 1

+ );

+ DAG.ComputeMaskedBits(

+ Op.getOperand(0),

+ KnownZero2,

+ KnownOne2

+ );

+ assert((KnownZero & KnownOne) == 0

+ && "Bits known to be one AND zero?");

+ assert((KnownZero2 & KnownOne2) == 0

+ && "Bits known to be one AND zero?");

+ // Only known if known in both the LHS and RHS

+ KnownOne &= KnownOne2;

+ KnownZero &= KnownZero2;

+ break;

+ };

+//===----------------------------------------------------------------------===//

+// Other Lowering Hooks

+//===----------------------------------------------------------------------===//

+SDValue

+AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {

+ EVT OVT = Op.getValueType();

+ SDValue DST;

+ if (OVT.getScalarType() == MVT::i64) {

+ DST = LowerSDIV64(Op, DAG);

+ } else if (OVT.getScalarType() == MVT::i32) {

+ DST = LowerSDIV32(Op, DAG);

+ } else if (OVT.getScalarType() == MVT::i16

+ || OVT.getScalarType() == MVT::i8) {

+ DST = LowerSDIV24(Op, DAG);

+ } else {

+ DST = SDValue(Op.getNode(), 0);

+ }

+ return DST;

+SDValue

+AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {

+ EVT OVT = Op.getValueType();

+ SDValue DST;

+ if (OVT.getScalarType() == MVT::i64) {

+ DST = LowerSREM64(Op, DAG);

+ } else if (OVT.getScalarType() == MVT::i32) {

+ DST = LowerSREM32(Op, DAG);

+ } else if (OVT.getScalarType() == MVT::i16) {

+ DST = LowerSREM16(Op, DAG);

+ } else if (OVT.getScalarType() == MVT::i8) {

+ DST = LowerSREM8(Op, DAG);

+ } else {

+ DST = SDValue(Op.getNode(), 0);

+ }

+ return DST;

+SDValue

+AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {

+ SDValue Data = Op.getOperand(0);

+ VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));

+ DebugLoc DL = Op.getDebugLoc();

+ EVT DVT = Data.getValueType();

+ EVT BVT = BaseType->getVT();

+ unsigned baseBits = BVT.getScalarType().getSizeInBits();

+ unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;

+ unsigned shiftBits = srcBits - baseBits;

+ if (srcBits < 32) {

+ // If the op is less than 32 bits, then it needs to extend to 32bits

+ // so it can properly keep the upper bits valid.

+ EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);

+ Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);

+ shiftBits = 32 - baseBits;

+ DVT = IVT;

+ }

+ SDValue Shift = DAG.getConstant(shiftBits, DVT);

+ // Shift left by 'Shift' bits.

+ Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);

+ // Signed shift Right by 'Shift' bits.

+ Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);

+ if (srcBits < 32) {

+ // Once the sign extension is done, the op needs to be converted to

+ // its original type.

+ Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());

+ }

+ return Data;

+EVT

+AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {

+ int iSize = (size * numEle);

+ int vEle = (iSize >> ((size == 64) ? 6 : 5));

+ if (!vEle) {

+ vEle = 1;

+ }

+ if (size == 64) {

+ if (vEle == 1) {

+ return EVT(MVT::i64);

+ } else {

+ return EVT(MVT::getVectorVT(MVT::i64, vEle));

+ }

+ } else {

+ if (vEle == 1) {

+ return EVT(MVT::i32);

+ } else {

+ return EVT(MVT::getVectorVT(MVT::i32, vEle));

+ }

+SDValue

+AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {

+ SDValue Chain = Op.getOperand(0);

+ SDValue Cond = Op.getOperand(1);

+ SDValue Jump = Op.getOperand(2);

+ SDValue Result;

+ Result = DAG.getNode(

+ AMDGPUISD::BRANCH_COND,

+ Op.getDebugLoc(),

+ Op.getValueType(),

+ Chain, Jump, Cond);

+ return Result;

+SDValue

+AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {

+ DebugLoc DL = Op.getDebugLoc();

+ EVT OVT = Op.getValueType();

+ SDValue LHS = Op.getOperand(0);

+ SDValue RHS = Op.getOperand(1);

+ MVT INTTY;

+ MVT FLTTY;

+ if (!OVT.isVector()) {

+ INTTY = MVT::i32;

+ FLTTY = MVT::f32;

+ } else if (OVT.getVectorNumElements() == 2) {

+ INTTY = MVT::v2i32;

+ FLTTY = MVT::v2f32;

+ } else if (OVT.getVectorNumElements() == 4) {

+ INTTY = MVT::v4i32;

+ FLTTY = MVT::v4f32;

+ }

+ unsigned bitsize = OVT.getScalarType().getSizeInBits();

+ // char|short jq = ia ^ ib;

+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);

+ // jq = jq >> (bitsize - 2)

+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));

+ // jq = jq | 0x1

+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));

+ // jq = (int)jq

+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY);

+ // int ia = (int)LHS;

+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);

+ // int ib, (int)RHS;

+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);

+ // float fa = (float)ia;

+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);

+ // float fb = (float)ib;

+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);

+ // float fq = native_divide(fa, fb);

+ SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);

+ // fq = trunc(fq);

+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);

+ // float fqneg = -fq;

+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);

+ // float fr = mad(fqneg, fb, fa);

+ SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,

+ DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);

+ // int iq = (int)fq;

+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);

+ // fr = fabs(fr);

+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);

+ // fb = fabs(fb);

+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);

+ // int cv = fr >= fb;

+ SDValue cv;

+ if (INTTY == MVT::i32) {

+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);

+ } else {

+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);

+ }

+ // jq = (cv ? jq : 0);

+ jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,

+ DAG.getConstant(0, OVT));

+ // dst = iq + jq;

+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);

+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);

+ return iq;

+SDValue

+AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {

+ DebugLoc DL = Op.getDebugLoc();

+ EVT OVT = Op.getValueType();

+ SDValue LHS = Op.getOperand(0);

+ SDValue RHS = Op.getOperand(1);

+ // The LowerSDIV32 function generates equivalent to the following IL.

+ // mov r0, LHS

+ // mov r1, RHS

+ // ilt r10, r0, 0

+ // ilt r11, r1, 0

+ // iadd r0, r0, r10

+ // iadd r1, r1, r11

+ // ixor r0, r0, r10

+ // ixor r1, r1, r11

+ // udiv r0, r0, r1

+ // ixor r10, r10, r11

+ // iadd r0, r0, r10

+ // ixor DST, r0, r10

+ // mov r0, LHS

+ SDValue r0 = LHS;

+ // mov r1, RHS

+ SDValue r1 = RHS;

+ // ilt r10, r0, 0

+ SDValue r10 = DAG.getSelectCC(DL,

+ r0, DAG.getConstant(0, OVT),

+ DAG.getConstant(-1, MVT::i32),

+ DAG.getConstant(0, MVT::i32),

+ ISD::SETLT);

+ // ilt r11, r1, 0

+ SDValue r11 = DAG.getSelectCC(DL,

+ r1, DAG.getConstant(0, OVT),

+ DAG.getConstant(-1, MVT::i32),

+ DAG.getConstant(0, MVT::i32),

+ ISD::SETLT);

+ // iadd r0, r0, r10

+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);

+ // iadd r1, r1, r11

+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);

+ // ixor r0, r0, r10

+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);

+ // ixor r1, r1, r11

+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);

+ // udiv r0, r0, r1

+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);

+ // ixor r10, r10, r11

+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);

+ // iadd r0, r0, r10

+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);

+ // ixor DST, r0, r10

+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);

+ return DST;

+SDValue

+AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {

+ return SDValue(Op.getNode(), 0);

+SDValue

+AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {

+ DebugLoc DL = Op.getDebugLoc();

+ EVT OVT = Op.getValueType();

+ MVT INTTY = MVT::i32;

+ if (OVT == MVT::v2i8) {

+ INTTY = MVT::v2i32;

+ } else if (OVT == MVT::v4i8) {

+ INTTY = MVT::v4i32;

+ }

+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);

+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);

+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);

+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);

+ return LHS;

+SDValue

+AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {

+ DebugLoc DL = Op.getDebugLoc();

+ EVT OVT = Op.getValueType();

+ MVT INTTY = MVT::i32;

+ if (OVT == MVT::v2i16) {

+ INTTY = MVT::v2i32;

+ } else if (OVT == MVT::v4i16) {

+ INTTY = MVT::v4i32;

+ }

+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);

+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);

+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);

+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);

+ return LHS;

+SDValue

+AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {

+ DebugLoc DL = Op.getDebugLoc();

+ EVT OVT = Op.getValueType();

+ SDValue LHS = Op.getOperand(0);

+ SDValue RHS = Op.getOperand(1);

+ // The LowerSREM32 function generates equivalent to the following IL.

+ // mov r0, LHS

+ // mov r1, RHS

+ // ilt r10, r0, 0

+ // ilt r11, r1, 0

+ // iadd r0, r0, r10

+ // iadd r1, r1, r11

+ // ixor r0, r0, r10

+ // ixor r1, r1, r11

+ // udiv r20, r0, r1

+ // umul r20, r20, r1

+ // sub r0, r0, r20

+ // iadd r0, r0, r10

+ // ixor DST, r0, r10

+ // mov r0, LHS

+ SDValue r0 = LHS;

+ // mov r1, RHS

+ SDValue r1 = RHS;

+ // ilt r10, r0, 0

+ SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);

+ // ilt r11, r1, 0

+ SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);

+ // iadd r0, r0, r10

+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);

+ // iadd r1, r1, r11

+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);

+ // ixor r0, r0, r10

+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);

+ // ixor r1, r1, r11

+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);

+ // udiv r20, r0, r1

+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);

+ // umul r20, r20, r1

+ r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);

+ // sub r0, r0, r20

+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);

+ // iadd r0, r0, r10

+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);

+ // ixor DST, r0, r10

+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);

+ return DST;

+SDValue

+AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {

+ return SDValue(Op.getNode(), 0);