diff options
Diffstat (limited to 'lib/Target/CellSPU')
-rw-r--r-- | lib/Target/CellSPU/SPUCallingConv.td | 24 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 44 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUISelLowering.cpp | 105 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUInstrInfo.cpp | 142 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUInstrInfo.h | 6 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUInstrInfo.td | 156 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUOperands.td | 6 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPURegisterInfo.cpp | 11 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPURegisterInfo.h | 5 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPURegisterInfo.td | 2 |
10 files changed, 144 insertions, 357 deletions
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td index ec2f663908f6..04fa2ae866d6 100644 --- a/lib/Target/CellSPU/SPUCallingConv.td +++ b/lib/Target/CellSPU/SPUCallingConv.td @@ -1,4 +1,4 @@ -//===- SPUCallingConv.td - Calling Conventions for CellSPU ------*- C++ -*-===// +//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -19,16 +19,17 @@ class CCIfSubtarget<string F, CCAction A> // Return Value Calling Convention //===----------------------------------------------------------------------===// -// Return-value convention for Cell SPU: Everything can be passed back via $3: +// Return-value convention for Cell SPU: return value to be passed in reg 3-74 def RetCC_SPU : CallingConv<[ - CCIfType<[i8], CCAssignToReg<[R3]>>, - CCIfType<[i16], CCAssignToReg<[R3]>>, - CCIfType<[i32], CCAssignToReg<[R3]>>, - CCIfType<[i64], CCAssignToReg<[R3]>>, - CCIfType<[i128], CCAssignToReg<[R3]>>, - CCIfType<[f32, f64], CCAssignToReg<[R3]>>, - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>>, - CCIfType<[v2i32], CCAssignToReg<[R3]>> + CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64], + CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74]>> ]>; @@ -45,8 +46,7 @@ def CCC_SPU : CallingConv<[ R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, + R66, R67, R68, R69, R70, R71, R72, R73, R74]>>, // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 9b8c2ddd0635..2f1598441f5a 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -41,13 +41,6 @@ using namespace llvm; namespace { //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates bool - isI64IntS10Immediate(ConstantSDNode *CN) - { - return isInt<10>(CN->getSExtValue()); - } - - //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates - bool isI32IntS10Immediate(ConstantSDNode *CN) { return isInt<10>(CN->getSExtValue()); @@ -67,14 +60,6 @@ namespace { return isInt<10>(CN->getSExtValue()); } - //! SDNode predicate for i16 sign-extended, 10-bit immediate values - bool - isI16IntS10Immediate(SDNode *N) - { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); - return (CN != 0 && isI16IntS10Immediate(CN)); - } - //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values bool isI16IntU10Immediate(ConstantSDNode *CN) @@ -82,14 +67,6 @@ namespace { return isUInt<10>((short) CN->getZExtValue()); } - //! SDNode predicate for i16 sign-extended, 10-bit immediate values - bool - isI16IntU10Immediate(SDNode *N) - { - return (N->getOpcode() == ISD::Constant - && isI16IntU10Immediate(cast<ConstantSDNode>(N))); - } - //! ConstantSDNode predicate for signed 16-bit values /*! \arg CN The constant SelectionDAG node holding the value @@ -119,14 +96,6 @@ namespace { return false; } - //! SDNode predicate for signed 16-bit values. - bool - isIntS16Immediate(SDNode *N, short &Imm) - { - return (N->getOpcode() == ISD::Constant - && isIntS16Immediate(cast<ConstantSDNode>(N), Imm)); - } - //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext. static bool isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm) @@ -142,16 +111,6 @@ namespace { return false; } - bool - isHighLow(const SDValue &Op) - { - return (Op.getOpcode() == SPUISD::IndirectAddr - && ((Op.getOperand(0).getOpcode() == SPUISD::Hi - && Op.getOperand(1).getOpcode() == SPUISD::Lo) - || (Op.getOperand(0).getOpcode() == SPUISD::Lo - && Op.getOperand(1).getOpcode() == SPUISD::Hi))); - } - //===------------------------------------------------------------------===// //! EVT to "useful stuff" mapping structure: @@ -607,7 +566,8 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, return true; } else if (Opc == ISD::Register ||Opc == ISD::CopyFromReg - ||Opc == ISD::UNDEF) { + ||Opc == ISD::UNDEF + ||Opc == ISD::Constant) { unsigned OpOpc = Op->getOpcode(); if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) { diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index ece19b9b89f6..46f31899be0c 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -426,9 +426,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); - // "Odd size" vector classes that we're willing to support: - addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass); - for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT::SimpleValueType VT = (MVT::SimpleValueType)i; @@ -751,7 +748,6 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (alignment == 16) { ConstantSDNode *CN; - // Special cases for a known aligned load to simplify the base pointer // and insertion byte: if (basePtr.getOpcode() == ISD::ADD @@ -775,6 +771,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, basePtr, DAG.getConstant(0, PtrVT)); + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); } } else { // Unaligned load: must be more pessimistic about addressing modes: @@ -811,8 +810,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { DAG.getConstant(0, PtrVT)); } - // Re-emit as a v16i8 vector load - alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, + // Load the memory to which to store. + alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr, SN->getSrcValue(), SN->getSrcValueOffset(), SN->isVolatile(), SN->isNonTemporal(), 16); @@ -843,10 +842,10 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { } #endif - SDValue insertEltOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs); - SDValue vectorizeOp = - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue); + SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, + insertEltOffs); + SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, + theValue); result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, vectorizeOp, alignLoadVec, @@ -1325,41 +1324,23 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (Ins.empty()) return Chain; + // Now handle the return value(s) + SmallVector<CCValAssign, 16> RVLocs; + CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU); + + // If the call has results, copy the values out of the ret val registers. - switch (Ins[0].VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected ret value!"); - case MVT::Other: break; - case MVT::i32: - if (Ins.size() > 1 && Ins[1].VT == MVT::i32) { - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4, - MVT::i32, InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, - Chain.getValue(2)).getValue(1); - InVals.push_back(Chain.getValue(0)); - } else { - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, - InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - } - break; - case MVT::i8: - case MVT::i16: - case MVT::i64: - case MVT::i128: - case MVT::f32: - case MVT::f64: - case MVT::v2f64: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v16i8: - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT, - InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - break; - } + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign VA = RVLocs[i]; + + SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), + InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + InVals.push_back(Val); + } return Chain; } @@ -1621,10 +1602,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); } - case MVT::v2i32: { - SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T); - } case MVT::v2i64: { return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); } @@ -1748,11 +1725,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If we have a single element being moved from V1 to V2, this can be handled // using the C*[DX] compute mask instructions, but the vector elements have - // to be monotonically increasing with one exception element. + // to be monotonically increasing with one exception element, and the source + // slot of the element to move must be the same as the destination. EVT VecVT = V1.getValueType(); EVT EltVT = VecVT.getVectorElementType(); unsigned EltsFromV2 = 0; - unsigned V2Elt = 0; + unsigned V2EltOffset = 0; unsigned V2EltIdx0 = 0; unsigned CurrElt = 0; unsigned MaxElts = VecVT.getVectorNumElements(); @@ -1785,9 +1763,13 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (monotonic) { if (SrcElt >= V2EltIdx0) { - if (1 >= (++EltsFromV2)) { - V2Elt = (V2EltIdx0 - SrcElt) << 2; - } + // TODO: optimize for the monotonic case when several consecutive + // elements are taken form V2. Do we ever get such a case? + if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0)) + V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8); + else + monotonic = false; + ++EltsFromV2; } else if (CurrElt != SrcElt) { monotonic = false; } @@ -1823,7 +1805,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // R1 ($sp) is used here only as it is guaranteed to have last bits zero SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, DAG.getRegister(SPU::R1, PtrVT), - DAG.getConstant(V2Elt, MVT::i32)); + DAG.getConstant(V2EltOffset, MVT::i32)); SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); @@ -1847,7 +1829,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { for (unsigned j = 0; j < BytesPerElement; ++j) ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); } - SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &ResultMask[0], ResultMask.size()); return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); @@ -1997,7 +1978,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // Variable index: Rotate the requested element into slot 0, then replicate // slot 0 across the vector EVT VecVT = N.getValueType(); - if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { + if (!VecVT.isSimple() || !VecVT.isVector()) { report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" "vector type!"); } @@ -2072,21 +2053,25 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue IdxOp = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); + EVT eltVT = ValOp.getValueType(); // use 0 when the lane to insert to is 'undef' - int64_t Idx=0; + int64_t Offset=0; if (IdxOp.getOpcode() != ISD::UNDEF) { ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); - Idx = (CN->getSExtValue()); + Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8; } EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Use $sp ($1) because it's always 16-byte aligned and it's available: SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, DAG.getRegister(SPU::R1, PtrVT), - DAG.getConstant(Idx, PtrVT)); - SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer); + DAG.getConstant(Offset, PtrVT)); + // widen the mask when dealing with half vectors + EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), + 128/ VT.getVectorElementType().getSizeInBits()); + SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); SDValue result = DAG.getNode(SPUISD::SHUFB, dl, VT, diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 69aa0887bd77..26d6b4f25ef1 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -54,148 +54,6 @@ SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm) RI(*TM.getSubtargetImpl(), *this) { /* NOP */ } -bool -SPUInstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned& sourceReg, - unsigned& destReg, - unsigned& SrcSR, unsigned& DstSR) const { - SrcSR = DstSR = 0; // No sub-registers. - - switch (MI.getOpcode()) { - default: - break; - case SPU::ORIv4i32: - case SPU::ORIr32: - case SPU::ORHIv8i16: - case SPU::ORHIr16: - case SPU::ORHIi8i16: - case SPU::ORBIv16i8: - case SPU::ORBIr8: - case SPU::ORIi16i32: - case SPU::ORIi8i32: - case SPU::AHIvec: - case SPU::AHIr16: - case SPU::AIv4i32: - assert(MI.getNumOperands() == 3 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - MI.getOperand(2).isImm() && - "invalid SPU ORI/ORHI/ORBI/AHI/AI/SFI/SFHI instruction!"); - if (MI.getOperand(2).getImm() == 0) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - break; - case SPU::AIr32: - assert(MI.getNumOperands() == 3 && - "wrong number of operands to AIr32"); - if (MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - (MI.getOperand(2).isImm() && - MI.getOperand(2).getImm() == 0)) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - break; - case SPU::LRr8: - case SPU::LRr16: - case SPU::LRr32: - case SPU::LRf32: - case SPU::LRr64: - case SPU::LRf64: - case SPU::LRr128: - case SPU::LRv16i8: - case SPU::LRv8i16: - case SPU::LRv4i32: - case SPU::LRv4f32: - case SPU::LRv2i64: - case SPU::LRv2f64: - case SPU::ORv16i8_i8: - case SPU::ORv8i16_i16: - case SPU::ORv4i32_i32: - case SPU::ORv2i64_i64: - case SPU::ORv4f32_f32: - case SPU::ORv2f64_f64: - case SPU::ORi8_v16i8: - case SPU::ORi16_v8i16: - case SPU::ORi32_v4i32: - case SPU::ORi64_v2i64: - case SPU::ORf32_v4f32: - case SPU::ORf64_v2f64: -/* - case SPU::ORi128_r64: - case SPU::ORi128_f64: - case SPU::ORi128_r32: - case SPU::ORi128_f32: - case SPU::ORi128_r16: - case SPU::ORi128_r8: -*/ - case SPU::ORi128_vec: -/* - case SPU::ORr64_i128: - case SPU::ORf64_i128: - case SPU::ORr32_i128: - case SPU::ORf32_i128: - case SPU::ORr16_i128: - case SPU::ORr8_i128: -*/ - case SPU::ORvec_i128: -/* - case SPU::ORr16_r32: - case SPU::ORr8_r32: - case SPU::ORf32_r32: - case SPU::ORr32_f32: - case SPU::ORr32_r16: - case SPU::ORr32_r8: - case SPU::ORr16_r64: - case SPU::ORr8_r64: - case SPU::ORr64_r16: - case SPU::ORr64_r8: -*/ - case SPU::ORr64_r32: - case SPU::ORr32_r64: - case SPU::ORf32_r32: - case SPU::ORr32_f32: - case SPU::ORf64_r64: - case SPU::ORr64_f64: { - assert(MI.getNumOperands() == 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "invalid SPU OR<type>_<vec> or LR instruction!"); - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - break; - } - case SPU::ORv16i8: - case SPU::ORv8i16: - case SPU::ORv4i32: - case SPU::ORv2i64: - case SPU::ORr8: - case SPU::ORr16: - case SPU::ORr32: - case SPU::ORr64: - case SPU::ORr128: - case SPU::ORf32: - case SPU::ORf64: - assert(MI.getNumOperands() == 3 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - MI.getOperand(2).isReg() && - "invalid SPU OR(vec|r32|r64|gprc) instruction!"); - if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - break; - } - - return false; -} - unsigned SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index fbb173318148..191e55d0ca61 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -32,12 +32,6 @@ namespace llvm { /// virtual const SPURegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index a7fb14c26a76..ca0fe00e37f8 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -62,8 +62,6 @@ let canFoldAsLoad = 1 in { def v4f32: LoadDFormVec<v4f32>; def v2f64: LoadDFormVec<v2f64>; - def v2i32: LoadDFormVec<v2i32>; - def r128: LoadDForm<GPRC>; def r64: LoadDForm<R64C>; def r32: LoadDForm<R32C>; @@ -96,8 +94,6 @@ let canFoldAsLoad = 1 in { def v4f32: LoadAFormVec<v4f32>; def v2f64: LoadAFormVec<v2f64>; - def v2i32: LoadAFormVec<v2i32>; - def r128: LoadAForm<GPRC>; def r64: LoadAForm<R64C>; def r32: LoadAForm<R32C>; @@ -130,8 +126,6 @@ let canFoldAsLoad = 1 in { def v4f32: LoadXFormVec<v4f32>; def v2f64: LoadXFormVec<v2f64>; - def v2i32: LoadXFormVec<v2i32>; - def r128: LoadXForm<GPRC>; def r64: LoadXForm<R64C>; def r32: LoadXForm<R32C>; @@ -180,8 +174,6 @@ multiclass StoreDForms def v4f32: StoreDFormVec<v4f32>; def v2f64: StoreDFormVec<v2f64>; - def v2i32: StoreDFormVec<v2i32>; - def r128: StoreDForm<GPRC>; def r64: StoreDForm<R64C>; def r32: StoreDForm<R32C>; @@ -212,8 +204,6 @@ multiclass StoreAForms def v4f32: StoreAFormVec<v4f32>; def v2f64: StoreAFormVec<v2f64>; - def v2i32: StoreAFormVec<v2i32>; - def r128: StoreAForm<GPRC>; def r64: StoreAForm<R64C>; def r32: StoreAForm<R32C>; @@ -246,8 +236,6 @@ multiclass StoreXForms def v4f32: StoreXFormVec<v4f32>; def v2f64: StoreXFormVec<v2f64>; - def v2i32: StoreXFormVec<v2i32>; - def r128: StoreXForm<GPRC>; def r64: StoreXForm<R64C>; def r32: StoreXForm<R32C>; @@ -607,7 +595,6 @@ class ARegInst<RegisterClass rclass>: multiclass AddInstruction { def v4i32: AVecInst<v4i32>; def v16i8: AVecInst<v16i8>; - def r32: ARegInst<R32C>; } @@ -672,6 +659,7 @@ def SFvec : RRForm<0b00000010000, (outs VECREG:$rT), "sf\t$rT, $rA, $rB", IntegerOp, [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>; + def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), "sf\t$rT, $rA, $rB", IntegerOp, [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>; @@ -1448,6 +1436,9 @@ class ORCvtGPRCVec: class ORCvtVecGPRC: ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>; +class ORCvtVecVec: + ORCvtForm<(outs VECREG:$rT), (ins VECREG:$rA)>; + multiclass BitwiseOr { def v16i8: ORVecInst<v16i8>; @@ -3894,6 +3885,79 @@ multiclass SFPSub defm FS : SFPSub; +class FMInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b01100011010, OOL, IOL, + "fm\t$rT, $rA, $rB", SPrecFP, + pattern>; + +class FMVecInst<ValueType type>: + FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (type VECREG:$rT), + (fmul (type VECREG:$rA), (type VECREG:$rB)))]>; + +multiclass SFPMul +{ + def v4f32: FMVecInst<v4f32>; + def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), + [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>; +} + +defm FM : SFPMul; + +// Floating point multiply and add +// e.g. d = c + (a * b) +def FMAv4f32: + RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fma\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fadd (v4f32 VECREG:$rC), + (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>; + +def FMAf32: + RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fma\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; + +// FP multiply and subtract +// Subtracts value in rC from product +// res = a * b - c +def FMSv4f32 : + RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), + (v4f32 VECREG:$rC)))]>; + +def FMSf32 : + RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, + (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>; + +// Floating Negative Mulitply and Subtract +// Subtracts product from value in rC +// res = fneg(fms a b c) +// = - (a * b - c) +// = c - a * b +// NOTE: subtraction order +// fsub a b = a - b +// fs a b = b - a? +def FNMSf32 : + RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fnms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; + +def FNMSv4f32 : + RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fnms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fsub (v4f32 VECREG:$rC), + (fmul (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB))))]>; + + + + // Floating point reciprocal estimate class FRESTInst<dag OOL, dag IOL>: @@ -4019,72 +4083,6 @@ def FSCRRf32 : // status and control register read //-------------------------------------- -// Floating point multiply instructions -//-------------------------------------- - -def FMv4f32: - RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fm\t$rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (fmul (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def FMf32 : - RRForm<0b01100011010, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), - "fm\t$rT, $rA, $rB", SPrecFP, - [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>; - -// Floating point multiply and add -// e.g. d = c + (a * b) -def FMAv4f32: - RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fma\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fadd (v4f32 VECREG:$rC), - (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>; - -def FMAf32: - RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fma\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; - -// FP multiply and subtract -// Subtracts value in rC from product -// res = a * b - c -def FMSv4f32 : - RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), - (v4f32 VECREG:$rC)))]>; - -def FMSf32 : - RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, - (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>; - -// Floating Negative Mulitply and Subtract -// Subtracts product from value in rC -// res = fneg(fms a b c) -// = - (a * b - c) -// = c - a * b -// NOTE: subtraction order -// fsub a b = a - b -// fs a b = b - a? -def FNMSf32 : - RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fnms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; - -def FNMSv4f32 : - RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fnms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fsub (v4f32 VECREG:$rC), - (fmul (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB))))]>; - -//-------------------------------------- // Floating Point Conversions // Signed conversions: def CSiFv4f32: diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td index 6216651e48a4..e1a0358abc46 100644 --- a/lib/Target/CellSPU/SPUOperands.td +++ b/lib/Target/CellSPU/SPUOperands.td @@ -98,12 +98,6 @@ def immU8 : PatLeaf<(imm), [{ return (N->getZExtValue() <= 0xff); }]>; -// i64ImmSExt10 predicate - True if the i64 immediate fits in a 10-bit sign -// extended field. Used by RI10Form instructions like 'ldq'. -def i64ImmSExt10 : PatLeaf<(imm), [{ - return isI64IntS10Immediate(N); -}]>; - // i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign // extended field. Used by RI10Form instructions like 'ldq'. def i32ImmSExt10 : PatLeaf<(imm), [{ diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index f7cfa42f2a95..cf718917a561 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -270,9 +270,8 @@ SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, MBB.erase(I); } -unsigned +void SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - FrameIndexValue *Value, RegScavenger *RS) const { unsigned i = 0; @@ -328,7 +327,6 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, } else { MO.ChangeToImmediate(Offset); } - return 0; } /// determineFrameLayout - Determine the size of the frame and maximum call @@ -417,7 +415,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const if (hasDebugInfo) { // Mark effective beginning of when frame pointer becomes valid. FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel); } // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp) @@ -476,7 +474,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const // Mark effective beginning of when frame pointer is ready. MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(ReadyLabel); + BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel); MachineLocation FPDst(SPU::R1); MachineLocation FPSrc(MachineLocation::VirtualFP); @@ -491,7 +489,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const dl = MBBI->getDebugLoc(); // Insert terminator label - BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)) + BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)) .addSym(MMI.getContext().CreateTempSymbol()); } } @@ -587,6 +585,7 @@ SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const case SPU::LQDr32: return SPU::LQXr32; case SPU::LQDr128: return SPU::LQXr128; case SPU::LQDv16i8: return SPU::LQXv16i8; + case SPU::LQDv4i32: return SPU::LQXv4i32; case SPU::LQDv4f32: return SPU::LQXv4f32; case SPU::STQDr32: return SPU::STQXr32; case SPU::STQDr128: return SPU::STQXr128; diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 7a6ae6d43c7e..aedb769cb4fc 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -63,9 +63,8 @@ namespace llvm { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; //! Convert frame indicies into machine operands - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + RegScavenger *RS = NULL) const; //! Determine the frame's layour void determineFrameLayout(MachineFunction &MF) const; diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td index bb88f2bf9a29..3e8f0979256a 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.td +++ b/lib/Target/CellSPU/SPURegisterInfo.td @@ -394,7 +394,7 @@ def R8C : RegisterClass<"SPU", [i8], 128, // The SPU's registers as vector registers: def VECREG : RegisterClass<"SPU", - [v16i8,v8i16,v2i32,v4i32,v4f32,v2i64,v2f64], + [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128, [ /* volatile register */ |