diff options
Diffstat (limited to 'lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 127 |
1 files changed, 122 insertions, 5 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3d06de804200..6dd73174565a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -665,9 +665,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); } + if (Subtarget.hasP9Vector()) { - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); } } @@ -7846,6 +7847,17 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo()); } +SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && + "Should only be called for ISD::INSERT_VECTOR_ELT"); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + // We have legal lowering for constant indices but not for variable ones. + if (C) + return Op; + return SDValue(); +} + SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -8248,6 +8260,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); // For counter-based loop handling. @@ -8372,7 +8385,9 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned AtomicSize, - unsigned BinOpcode) const { + unsigned BinOpcode, + unsigned CmpOpcode, + unsigned CmpPred) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); @@ -8412,8 +8427,12 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = + CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); + if (CmpOpcode) + F->insert(It, loop2MBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); @@ -8435,11 +8454,40 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, // st[wd]cx. r0, ptr // bne- loopMBB // fallthrough --> exitMBB + + // For max/min... + // loopMBB: + // l[wd]arx dest, ptr + // cmpl?[wd] incr, dest + // bgt exitMBB + // loop2MBB: + // st[wd]cx. dest, ptr + // bne- loopMBB + // fallthrough --> exitMBB + BB = loopMBB; BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); + if (CmpOpcode) { + // Signed comparisons of byte or halfword values must be sign-extended. + if (CmpOpcode == PPC::CMPW && AtomicSize < 4) { + unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH), + ExtReg).addReg(dest); + BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) + .addReg(incr).addReg(ExtReg); + } else + BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) + .addReg(incr).addReg(dest); + + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(exitMBB); + BB = loop2MBB; + } BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(TmpReg).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) @@ -8457,10 +8505,13 @@ MachineBasicBlock * PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, bool is8bit, // operation - unsigned BinOpcode) const { + unsigned BinOpcode, + unsigned CmpOpcode, + unsigned CmpPred) const { // If we support part-word atomic mnemonics, just use them if (Subtarget.hasPartwordAtomics()) - return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode); + return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, + CmpOpcode, CmpPred); // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); @@ -8482,8 +8533,12 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = + CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); + if (CmpOpcode) + F->insert(It, loop2MBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); @@ -8568,6 +8623,32 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, .addReg(TmpDestReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) .addReg(TmpReg).addReg(MaskReg); + if (CmpOpcode) { + // For unsigned comparisons, we can directly compare the shifted values. + // For signed comparisons we shift and sign extend. + unsigned SReg = RegInfo.createVirtualRegister(RC); + BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg) + .addReg(TmpDestReg).addReg(MaskReg); + unsigned ValueReg = SReg; + unsigned CmpReg = Incr2Reg; + if (CmpOpcode == PPC::CMPW) { + ValueReg = RegInfo.createVirtualRegister(RC); + BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) + .addReg(SReg).addReg(ShiftReg); + unsigned ValueSReg = RegInfo.createVirtualRegister(RC); + BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) + .addReg(ValueReg); + ValueReg = ValueSReg; + CmpReg = incr; + } + BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) + .addReg(CmpReg).addReg(ValueReg); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(exitMBB); + BB = loop2MBB; + } BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) .addReg(Tmp3Reg).addReg(Tmp2Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)) @@ -9074,6 +9155,42 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32) + BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64) + BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE); + + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32) + BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64) + BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE); + + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32) + BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64) + BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE); + + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32) + BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64) + BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) |