aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/CellSPU
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/CellSPU')
-rw-r--r--lib/Target/CellSPU/SPUCallingConv.td24
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp44
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp105
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp142
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h6
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td156
-rw-r--r--lib/Target/CellSPU/SPUOperands.td6
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp11
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h5
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.td2
10 files changed, 144 insertions, 357 deletions
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
index ec2f663908f6..04fa2ae866d6 100644
--- a/lib/Target/CellSPU/SPUCallingConv.td
+++ b/lib/Target/CellSPU/SPUCallingConv.td
@@ -1,4 +1,4 @@
-//===- SPUCallingConv.td - Calling Conventions for CellSPU ------*- C++ -*-===//
+//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,16 +19,17 @@ class CCIfSubtarget<string F, CCAction A>
// Return Value Calling Convention
//===----------------------------------------------------------------------===//
-// Return-value convention for Cell SPU: Everything can be passed back via $3:
+// Return-value convention for Cell SPU: return value to be passed in reg 3-74
def RetCC_SPU : CallingConv<[
- CCIfType<[i8], CCAssignToReg<[R3]>>,
- CCIfType<[i16], CCAssignToReg<[R3]>>,
- CCIfType<[i32], CCAssignToReg<[R3]>>,
- CCIfType<[i64], CCAssignToReg<[R3]>>,
- CCIfType<[i128], CCAssignToReg<[R3]>>,
- CCIfType<[f32, f64], CCAssignToReg<[R3]>>,
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>>,
- CCIfType<[v2i32], CCAssignToReg<[R3]>>
+ CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64],
+ CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74]>>
]>;
@@ -45,8 +46,7 @@ def CCC_SPU : CallingConv<[
R39, R40, R41, R42, R43, R44, R45, R46, R47,
R48, R49, R50, R51, R52, R53, R54, R55, R56,
R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74]>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 9b8c2ddd0635..2f1598441f5a 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -41,13 +41,6 @@ using namespace llvm;
namespace {
//! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
bool
- isI64IntS10Immediate(ConstantSDNode *CN)
- {
- return isInt<10>(CN->getSExtValue());
- }
-
- //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
- bool
isI32IntS10Immediate(ConstantSDNode *CN)
{
return isInt<10>(CN->getSExtValue());
@@ -67,14 +60,6 @@ namespace {
return isInt<10>(CN->getSExtValue());
}
- //! SDNode predicate for i16 sign-extended, 10-bit immediate values
- bool
- isI16IntS10Immediate(SDNode *N)
- {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
- return (CN != 0 && isI16IntS10Immediate(CN));
- }
-
//! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
bool
isI16IntU10Immediate(ConstantSDNode *CN)
@@ -82,14 +67,6 @@ namespace {
return isUInt<10>((short) CN->getZExtValue());
}
- //! SDNode predicate for i16 sign-extended, 10-bit immediate values
- bool
- isI16IntU10Immediate(SDNode *N)
- {
- return (N->getOpcode() == ISD::Constant
- && isI16IntU10Immediate(cast<ConstantSDNode>(N)));
- }
-
//! ConstantSDNode predicate for signed 16-bit values
/*!
\arg CN The constant SelectionDAG node holding the value
@@ -119,14 +96,6 @@ namespace {
return false;
}
- //! SDNode predicate for signed 16-bit values.
- bool
- isIntS16Immediate(SDNode *N, short &Imm)
- {
- return (N->getOpcode() == ISD::Constant
- && isIntS16Immediate(cast<ConstantSDNode>(N), Imm));
- }
-
//! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
static bool
isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
@@ -142,16 +111,6 @@ namespace {
return false;
}
- bool
- isHighLow(const SDValue &Op)
- {
- return (Op.getOpcode() == SPUISD::IndirectAddr
- && ((Op.getOperand(0).getOpcode() == SPUISD::Hi
- && Op.getOperand(1).getOpcode() == SPUISD::Lo)
- || (Op.getOperand(0).getOpcode() == SPUISD::Lo
- && Op.getOperand(1).getOpcode() == SPUISD::Hi)));
- }
-
//===------------------------------------------------------------------===//
//! EVT to "useful stuff" mapping structure:
@@ -607,7 +566,8 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
return true;
} else if (Opc == ISD::Register
||Opc == ISD::CopyFromReg
- ||Opc == ISD::UNDEF) {
+ ||Opc == ISD::UNDEF
+ ||Opc == ISD::Constant) {
unsigned OpOpc = Op->getOpcode();
if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index ece19b9b89f6..46f31899be0c 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -426,9 +426,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
- // "Odd size" vector classes that we're willing to support:
- addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
-
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
@@ -751,7 +748,6 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
if (alignment == 16) {
ConstantSDNode *CN;
-
// Special cases for a known aligned load to simplify the base pointer
// and insertion byte:
if (basePtr.getOpcode() == ISD::ADD
@@ -775,6 +771,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
basePtr,
DAG.getConstant(0, PtrVT));
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
}
} else {
// Unaligned load: must be more pessimistic about addressing modes:
@@ -811,8 +810,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
DAG.getConstant(0, PtrVT));
}
- // Re-emit as a v16i8 vector load
- alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
+ // Load the memory to which to store.
+ alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr,
SN->getSrcValue(), SN->getSrcValueOffset(),
SN->isVolatile(), SN->isNonTemporal(), 16);
@@ -843,10 +842,10 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
}
#endif
- SDValue insertEltOp =
- DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
- SDValue vectorizeOp =
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
+ SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
+ insertEltOffs);
+ SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
+ theValue);
result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
vectorizeOp, alignLoadVec,
@@ -1325,41 +1324,23 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (Ins.empty())
return Chain;
+ // Now handle the return value(s)
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
+
+
// If the call has results, copy the values out of the ret val registers.
- switch (Ins[0].VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unexpected ret value!");
- case MVT::Other: break;
- case MVT::i32:
- if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
- MVT::i32, InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
- Chain.getValue(2)).getValue(1);
- InVals.push_back(Chain.getValue(0));
- } else {
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- }
- break;
- case MVT::i8:
- case MVT::i16:
- case MVT::i64:
- case MVT::i128:
- case MVT::f32:
- case MVT::f64:
- case MVT::v2f64:
- case MVT::v2i64:
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- break;
- }
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ InVals.push_back(Val);
+ }
return Chain;
}
@@ -1621,10 +1602,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
}
- case MVT::v2i32: {
- SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
- }
case MVT::v2i64: {
return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
}
@@ -1748,11 +1725,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// If we have a single element being moved from V1 to V2, this can be handled
// using the C*[DX] compute mask instructions, but the vector elements have
- // to be monotonically increasing with one exception element.
+ // to be monotonically increasing with one exception element, and the source
+ // slot of the element to move must be the same as the destination.
EVT VecVT = V1.getValueType();
EVT EltVT = VecVT.getVectorElementType();
unsigned EltsFromV2 = 0;
- unsigned V2Elt = 0;
+ unsigned V2EltOffset = 0;
unsigned V2EltIdx0 = 0;
unsigned CurrElt = 0;
unsigned MaxElts = VecVT.getVectorNumElements();
@@ -1785,9 +1763,13 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (monotonic) {
if (SrcElt >= V2EltIdx0) {
- if (1 >= (++EltsFromV2)) {
- V2Elt = (V2EltIdx0 - SrcElt) << 2;
- }
+ // TODO: optimize for the monotonic case when several consecutive
+ // elements are taken form V2. Do we ever get such a case?
+ if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
+ V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
+ else
+ monotonic = false;
+ ++EltsFromV2;
} else if (CurrElt != SrcElt) {
monotonic = false;
}
@@ -1823,7 +1805,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// R1 ($sp) is used here only as it is guaranteed to have last bits zero
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(V2Elt, MVT::i32));
+ DAG.getConstant(V2EltOffset, MVT::i32));
SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
maskVT, Pointer);
@@ -1847,7 +1829,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
for (unsigned j = 0; j < BytesPerElement; ++j)
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
}
-
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
&ResultMask[0], ResultMask.size());
return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
@@ -1997,7 +1978,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
// Variable index: Rotate the requested element into slot 0, then replicate
// slot 0 across the vector
EVT VecVT = N.getValueType();
- if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
+ if (!VecVT.isSimple() || !VecVT.isVector()) {
report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
"vector type!");
}
@@ -2072,21 +2053,25 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
SDValue IdxOp = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
+ EVT eltVT = ValOp.getValueType();
// use 0 when the lane to insert to is 'undef'
- int64_t Idx=0;
+ int64_t Offset=0;
if (IdxOp.getOpcode() != ISD::UNDEF) {
ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
- Idx = (CN->getSExtValue());
+ Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
}
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Use $sp ($1) because it's always 16-byte aligned and it's available:
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(Idx, PtrVT));
- SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
+ DAG.getConstant(Offset, PtrVT));
+ // widen the mask when dealing with half vectors
+ EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
+ 128/ VT.getVectorElementType().getSizeInBits());
+ SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
SDValue result =
DAG.getNode(SPUISD::SHUFB, dl, VT,
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 69aa0887bd77..26d6b4f25ef1 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -54,148 +54,6 @@ SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
RI(*TM.getSubtargetImpl(), *this)
{ /* NOP */ }
-bool
-SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned& sourceReg,
- unsigned& destReg,
- unsigned& SrcSR, unsigned& DstSR) const {
- SrcSR = DstSR = 0; // No sub-registers.
-
- switch (MI.getOpcode()) {
- default:
- break;
- case SPU::ORIv4i32:
- case SPU::ORIr32:
- case SPU::ORHIv8i16:
- case SPU::ORHIr16:
- case SPU::ORHIi8i16:
- case SPU::ORBIv16i8:
- case SPU::ORBIr8:
- case SPU::ORIi16i32:
- case SPU::ORIi8i32:
- case SPU::AHIvec:
- case SPU::AHIr16:
- case SPU::AIv4i32:
- assert(MI.getNumOperands() == 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isImm() &&
- "invalid SPU ORI/ORHI/ORBI/AHI/AI/SFI/SFHI instruction!");
- if (MI.getOperand(2).getImm() == 0) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- break;
- case SPU::AIr32:
- assert(MI.getNumOperands() == 3 &&
- "wrong number of operands to AIr32");
- if (MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- (MI.getOperand(2).isImm() &&
- MI.getOperand(2).getImm() == 0)) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- break;
- case SPU::LRr8:
- case SPU::LRr16:
- case SPU::LRr32:
- case SPU::LRf32:
- case SPU::LRr64:
- case SPU::LRf64:
- case SPU::LRr128:
- case SPU::LRv16i8:
- case SPU::LRv8i16:
- case SPU::LRv4i32:
- case SPU::LRv4f32:
- case SPU::LRv2i64:
- case SPU::LRv2f64:
- case SPU::ORv16i8_i8:
- case SPU::ORv8i16_i16:
- case SPU::ORv4i32_i32:
- case SPU::ORv2i64_i64:
- case SPU::ORv4f32_f32:
- case SPU::ORv2f64_f64:
- case SPU::ORi8_v16i8:
- case SPU::ORi16_v8i16:
- case SPU::ORi32_v4i32:
- case SPU::ORi64_v2i64:
- case SPU::ORf32_v4f32:
- case SPU::ORf64_v2f64:
-/*
- case SPU::ORi128_r64:
- case SPU::ORi128_f64:
- case SPU::ORi128_r32:
- case SPU::ORi128_f32:
- case SPU::ORi128_r16:
- case SPU::ORi128_r8:
-*/
- case SPU::ORi128_vec:
-/*
- case SPU::ORr64_i128:
- case SPU::ORf64_i128:
- case SPU::ORr32_i128:
- case SPU::ORf32_i128:
- case SPU::ORr16_i128:
- case SPU::ORr8_i128:
-*/
- case SPU::ORvec_i128:
-/*
- case SPU::ORr16_r32:
- case SPU::ORr8_r32:
- case SPU::ORf32_r32:
- case SPU::ORr32_f32:
- case SPU::ORr32_r16:
- case SPU::ORr32_r8:
- case SPU::ORr16_r64:
- case SPU::ORr8_r64:
- case SPU::ORr64_r16:
- case SPU::ORr64_r8:
-*/
- case SPU::ORr64_r32:
- case SPU::ORr32_r64:
- case SPU::ORf32_r32:
- case SPU::ORr32_f32:
- case SPU::ORf64_r64:
- case SPU::ORr64_f64: {
- assert(MI.getNumOperands() == 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid SPU OR<type>_<vec> or LR instruction!");
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- break;
- }
- case SPU::ORv16i8:
- case SPU::ORv8i16:
- case SPU::ORv4i32:
- case SPU::ORv2i64:
- case SPU::ORr8:
- case SPU::ORr16:
- case SPU::ORr32:
- case SPU::ORr64:
- case SPU::ORr128:
- case SPU::ORf32:
- case SPU::ORf64:
- assert(MI.getNumOperands() == 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isReg() &&
- "invalid SPU OR(vec|r32|r64|gprc) instruction!");
- if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- break;
- }
-
- return false;
-}
-
unsigned
SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index fbb173318148..191e55d0ca61 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -32,12 +32,6 @@ namespace llvm {
///
virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index a7fb14c26a76..ca0fe00e37f8 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -62,8 +62,6 @@ let canFoldAsLoad = 1 in {
def v4f32: LoadDFormVec<v4f32>;
def v2f64: LoadDFormVec<v2f64>;
- def v2i32: LoadDFormVec<v2i32>;
-
def r128: LoadDForm<GPRC>;
def r64: LoadDForm<R64C>;
def r32: LoadDForm<R32C>;
@@ -96,8 +94,6 @@ let canFoldAsLoad = 1 in {
def v4f32: LoadAFormVec<v4f32>;
def v2f64: LoadAFormVec<v2f64>;
- def v2i32: LoadAFormVec<v2i32>;
-
def r128: LoadAForm<GPRC>;
def r64: LoadAForm<R64C>;
def r32: LoadAForm<R32C>;
@@ -130,8 +126,6 @@ let canFoldAsLoad = 1 in {
def v4f32: LoadXFormVec<v4f32>;
def v2f64: LoadXFormVec<v2f64>;
- def v2i32: LoadXFormVec<v2i32>;
-
def r128: LoadXForm<GPRC>;
def r64: LoadXForm<R64C>;
def r32: LoadXForm<R32C>;
@@ -180,8 +174,6 @@ multiclass StoreDForms
def v4f32: StoreDFormVec<v4f32>;
def v2f64: StoreDFormVec<v2f64>;
- def v2i32: StoreDFormVec<v2i32>;
-
def r128: StoreDForm<GPRC>;
def r64: StoreDForm<R64C>;
def r32: StoreDForm<R32C>;
@@ -212,8 +204,6 @@ multiclass StoreAForms
def v4f32: StoreAFormVec<v4f32>;
def v2f64: StoreAFormVec<v2f64>;
- def v2i32: StoreAFormVec<v2i32>;
-
def r128: StoreAForm<GPRC>;
def r64: StoreAForm<R64C>;
def r32: StoreAForm<R32C>;
@@ -246,8 +236,6 @@ multiclass StoreXForms
def v4f32: StoreXFormVec<v4f32>;
def v2f64: StoreXFormVec<v2f64>;
- def v2i32: StoreXFormVec<v2i32>;
-
def r128: StoreXForm<GPRC>;
def r64: StoreXForm<R64C>;
def r32: StoreXForm<R32C>;
@@ -607,7 +595,6 @@ class ARegInst<RegisterClass rclass>:
multiclass AddInstruction {
def v4i32: AVecInst<v4i32>;
def v16i8: AVecInst<v16i8>;
-
def r32: ARegInst<R32C>;
}
@@ -672,6 +659,7 @@ def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
"sf\t$rT, $rA, $rB", IntegerOp,
[(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
+
def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"sf\t$rT, $rA, $rB", IntegerOp,
[(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
@@ -1448,6 +1436,9 @@ class ORCvtGPRCVec:
class ORCvtVecGPRC:
ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
+class ORCvtVecVec:
+ ORCvtForm<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
multiclass BitwiseOr
{
def v16i8: ORVecInst<v16i8>;
@@ -3894,6 +3885,79 @@ multiclass SFPSub
defm FS : SFPSub;
+class FMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01100011010, OOL, IOL,
+ "fm\t$rT, $rA, $rB", SPrecFP,
+ pattern>;
+
+class FMVecInst<ValueType type>:
+ FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (type VECREG:$rT),
+ (fmul (type VECREG:$rA), (type VECREG:$rB)))]>;
+
+multiclass SFPMul
+{
+ def v4f32: FMVecInst<v4f32>;
+ def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FM : SFPMul;
+
+// Floating point multiply and add
+// e.g. d = c + (a * b)
+def FMAv4f32:
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fadd (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
+
+def FMAf32:
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+// FP multiply and subtract
+// Subtracts value in rC from product
+// res = a * b - c
+def FMSv4f32 :
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (v4f32 VECREG:$rC)))]>;
+
+def FMSf32 :
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT,
+ (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
+
+// Floating Negative Mulitply and Subtract
+// Subtracts product from value in rC
+// res = fneg(fms a b c)
+// = - (a * b - c)
+// = c - a * b
+// NOTE: subtraction order
+// fsub a b = a - b
+// fs a b = b - a?
+def FNMSf32 :
+ RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+def FNMSv4f32 :
+ RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB))))]>;
+
+
+
+
// Floating point reciprocal estimate
class FRESTInst<dag OOL, dag IOL>:
@@ -4019,72 +4083,6 @@ def FSCRRf32 :
// status and control register read
//--------------------------------------
-// Floating point multiply instructions
-//--------------------------------------
-
-def FMv4f32:
- RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fm\t$rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (fmul (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def FMf32 :
- RRForm<0b01100011010, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- "fm\t$rT, $rA, $rB", SPrecFP,
- [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
-
-// Floating point multiply and add
-// e.g. d = c + (a * b)
-def FMAv4f32:
- RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fma\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fadd (v4f32 VECREG:$rC),
- (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
-
-def FMAf32:
- RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fma\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-// FP multiply and subtract
-// Subtracts value in rC from product
-// res = a * b - c
-def FMSv4f32 :
- RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
- (v4f32 VECREG:$rC)))]>;
-
-def FMSf32 :
- RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT,
- (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
-
-// Floating Negative Mulitply and Subtract
-// Subtracts product from value in rC
-// res = fneg(fms a b c)
-// = - (a * b - c)
-// = c - a * b
-// NOTE: subtraction order
-// fsub a b = a - b
-// fs a b = b - a?
-def FNMSf32 :
- RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-def FNMSv4f32 :
- RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fsub (v4f32 VECREG:$rC),
- (fmul (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB))))]>;
-
-//--------------------------------------
// Floating Point Conversions
// Signed conversions:
def CSiFv4f32:
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
index 6216651e48a4..e1a0358abc46 100644
--- a/lib/Target/CellSPU/SPUOperands.td
+++ b/lib/Target/CellSPU/SPUOperands.td
@@ -98,12 +98,6 @@ def immU8 : PatLeaf<(imm), [{
return (N->getZExtValue() <= 0xff);
}]>;
-// i64ImmSExt10 predicate - True if the i64 immediate fits in a 10-bit sign
-// extended field. Used by RI10Form instructions like 'ldq'.
-def i64ImmSExt10 : PatLeaf<(imm), [{
- return isI64IntS10Immediate(N);
-}]>;
-
// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
// extended field. Used by RI10Form instructions like 'ldq'.
def i32ImmSExt10 : PatLeaf<(imm), [{
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index f7cfa42f2a95..cf718917a561 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -270,9 +270,8 @@ SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
MBB.erase(I);
}
-unsigned
+void
SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value,
RegScavenger *RS) const
{
unsigned i = 0;
@@ -328,7 +327,6 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
} else {
MO.ChangeToImmediate(Offset);
}
- return 0;
}
/// determineFrameLayout - Determine the size of the frame and maximum call
@@ -417,7 +415,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
if (hasDebugInfo) {
// Mark effective beginning of when frame pointer becomes valid.
FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
}
// Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
@@ -476,7 +474,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
// Mark effective beginning of when frame pointer is ready.
MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(ReadyLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
MachineLocation FPDst(SPU::R1);
MachineLocation FPSrc(MachineLocation::VirtualFP);
@@ -491,7 +489,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
dl = MBBI->getDebugLoc();
// Insert terminator label
- BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL))
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
.addSym(MMI.getContext().CreateTempSymbol());
}
}
@@ -587,6 +585,7 @@ SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
case SPU::LQDr32: return SPU::LQXr32;
case SPU::LQDr128: return SPU::LQXr128;
case SPU::LQDv16i8: return SPU::LQXv16i8;
+ case SPU::LQDv4i32: return SPU::LQXv4i32;
case SPU::LQDv4f32: return SPU::LQXv4f32;
case SPU::STQDr32: return SPU::STQXr32;
case SPU::STQDr128: return SPU::STQXr128;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 7a6ae6d43c7e..aedb769cb4fc 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -63,9 +63,8 @@ namespace llvm {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
//! Convert frame indicies into machine operands
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS = NULL) const;
//! Determine the frame's layour
void determineFrameLayout(MachineFunction &MF) const;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td
index bb88f2bf9a29..3e8f0979256a 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.td
+++ b/lib/Target/CellSPU/SPURegisterInfo.td
@@ -394,7 +394,7 @@ def R8C : RegisterClass<"SPU", [i8], 128,
// The SPU's registers as vector registers:
def VECREG : RegisterClass<"SPU",
- [v16i8,v8i16,v2i32,v4i32,v4f32,v2i64,v2f64],
+ [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64],
128,
[
/* volatile register */