diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-12-25 22:30:44 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-12-25 22:30:44 +0000 |
commit | 77fc4c146f0870ffb09c1afb823ccbe742c5e6ff (patch) | |
tree | 5c0eb39553003b9c75a901af6bc4ddabd6f2f28c /llvm/lib/Target/PowerPC | |
parent | f65dcba83ce5035ab88a85fe17628b447eb56e1b (diff) | |
download | src-77fc4c146f0870ffb09c1afb823ccbe742c5e6ff.tar.gz src-77fc4c146f0870ffb09c1afb823ccbe742c5e6ff.zip |
Diffstat (limited to 'llvm/lib/Target/PowerPC')
-rw-r--r-- | llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 29 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPC.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCBack2BackFusion.def | 1042 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 56 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrVSX.td | 17 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCMacroFusion.def | 2 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCSubtarget.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 103 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h | 16 |
16 files changed, 1255 insertions, 55 deletions
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 9e181d4052d6..ded922329ebf 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1576,6 +1576,16 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, std::swap(Operands[2], Operands[1]); } + // Handle base mnemonic for atomic loads where the EH bit is zero. + if (Name == "lqarx" || Name == "ldarx" || Name == "lwarx" || + Name == "lharx" || Name == "lbarx") { + if (Operands.size() != 5) + return false; + PPCOperand &EHOp = (PPCOperand &)*Operands[4]; + if (EHOp.isU1Imm() && EHOp.getImm() == 0) + Operands.pop_back(); + } + return false; } @@ -1745,7 +1755,7 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, } PPCOperand &Op = static_cast<PPCOperand &>(AsmOp); - if (Op.isImm() && Op.getImm() == ImmVal) + if (Op.isU3Imm() && Op.getImm() == ImmVal) return Match_Success; return Match_InvalidOperand; diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 22b948a83c34..d6e02d0d0862 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -28,6 +28,7 @@ #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCRegisterInfo.h" @@ -368,6 +369,31 @@ static MCInstPrinter *createPPCMCInstPrinter(const Triple &T, return new PPCInstPrinter(MAI, MII, MRI, T); } +namespace { + +class PPCMCInstrAnalysis : public MCInstrAnalysis { +public: + explicit PPCMCInstrAnalysis(const MCInstrInfo *Info) + : MCInstrAnalysis(Info) {} + + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) const override { + unsigned NumOps = Inst.getNumOperands(); + if (NumOps == 0 || + Info->get(Inst.getOpcode()).OpInfo[NumOps - 1].OperandType != + MCOI::OPERAND_PCREL) + return false; + Target = Addr + Inst.getOperand(NumOps - 1).getImm() * Size; + return true; + } +}; + +} // end anonymous namespace + +static MCInstrAnalysis *createPPCMCInstrAnalysis(const MCInstrInfo *Info) { + return new PPCMCInstrAnalysis(Info); +} + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetMC() { for (Target *T : {&getThePPC32Target(), &getThePPC32LETarget(), &getThePPC64Target(), &getThePPC64LETarget()}) { @@ -383,6 +409,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetMC() { // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(*T, createPPCMCSubtargetInfo); + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(*T, createPPCMCInstrAnalysis); + // Register the MC Code Emitter TargetRegistry::RegisterMCCodeEmitter(*T, createPPCMCCodeEmitter); diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 422bd11dca52..bbd5f5fd1941 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -219,6 +219,10 @@ def FeatureZeroMoveFusion: SubtargetFeature<"fuse-zeromove", "HasZeroMoveFusion", "true", "Target supports move to SPR with branch fusion", [FeatureFusion]>; +def FeatureBack2BackFusion: + SubtargetFeature<"fuse-back2back", "HasBack2BackFusion", "true", + "Target supports general back to back fusion", + [FeatureFusion]>; def FeatureUnalignedFloats : SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess", "true", "CPU does not trap on unaligned FP access">; diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 16e3b2b85c2e..f26c15667a0b 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -347,7 +347,6 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, // At the moment, all inline asm memory operands are a single register. // In any case, the output of this routine should always be just one // assembler operand. - bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) { diff --git a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def new file mode 100644 index 000000000000..38ed5f2e78e3 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def @@ -0,0 +1,1042 @@ +// Automatically generated file, do not edit! +// +// This file defines instruction list for general back2back fusion. +//===----------------------------------------------------------------------===// +FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1, + FUSION_OP_SET(ADD4, + ADD4O, + ADD4TLS, + ADD4_rec, + ADD8, + ADD8O, + ADD8TLS, + ADD8TLS_, + ADD8_rec, + ADDE, + ADDE8, + ADDE8O, + ADDEO, + ADDEX, + ADDEX8, + ADDI, + ADDI8, + ADDIC, + ADDIC8, + ADDIS, + ADDIS8, + ADDISdtprelHA32, + ADDIStocHA, + ADDIStocHA8, + ADDIdtprelL32, + ADDItlsldLADDR32, + ADDItocL, + ADDME, + ADDME8, + ADDME8O, + ADDMEO, + ADDZE, + ADDZE8, + ADDZE8O, + ADDZEO, + AND, + AND8, + AND8_rec, + ANDC, + ANDC8, + ANDC8_rec, + ANDC_rec, + ANDI8_rec, + ANDIS8_rec, + ANDIS_rec, + ANDI_rec, + AND_rec, + CMPB, + CMPB8, + CNTLZD, + CNTLZD_rec, + CNTLZW, + CNTLZW8, + CNTLZW8_rec, + CNTLZW_rec, + CNTTZD, + CNTTZD_rec, + CNTTZW, + CNTTZW8, + CNTTZW8_rec, + CNTTZW_rec, + EQV, + EQV8, + EQV8_rec, + EQV_rec, + EXTSB, + EXTSB8, + EXTSB8_32_64, + EXTSB8_rec, + EXTSB_rec, + EXTSH, + EXTSH8, + EXTSH8_32_64, + EXTSH8_rec, + EXTSH_rec, + EXTSW, + EXTSWSLI, + EXTSWSLI_32_64, + EXTSWSLI_32_64_rec, + EXTSWSLI_rec, + EXTSW_32, + EXTSW_32_64, + EXTSW_32_64_rec, + EXTSW_rec, + FABSD, + FABSS, + FCPSGND, + FCPSGNS, + FMR, + FNABSD, + FNABSS, + FNEGD, + FNEGS, + ISEL, + ISEL8, + LI, + LI8, + LIS, + LIS8, + MFCTR, + MFCTR8, + MFLR, + MFLR8, + MFOCRF, + MFOCRF8, + MFVRD, + MFVRWZ, + MFVSRD, + MFVSRWZ, + MTVRD, + MTVRWA, + MTVRWZ, + MTVSRBM, + MTVSRBMI, + MTVSRD, + MTVSRDM, + MTVSRHM, + MTVSRQM, + MTVSRWA, + MTVSRWM, + MTVSRWZ, + NAND, + NAND8, + NAND8_rec, + NAND_rec, + NEG, + NEG8, + NEG8O, + NEG8_rec, + NEGO, + NEG_rec, + NOP, + NOP_GT_PWR6, + NOP_GT_PWR7, + NOR, + NOR8, + NOR8_rec, + NOR_rec, + OR, + OR8, + OR8_rec, + ORC, + ORC8, + ORC8_rec, + ORC_rec, + ORI, + ORI8, + ORIS, + ORIS8, + OR_rec, + POPCNTB, + POPCNTB8, + POPCNTD, + POPCNTW, + RLDCL, + RLDCL_rec, + RLDCR, + RLDCR_rec, + RLDIC, + RLDICL, + RLDICL_32, + RLDICL_32_64, + RLDICL_32_rec, + RLDICL_rec, + RLDICR, + RLDICR_32, + RLDICR_rec, + RLDIC_rec, + RLDIMI, + RLDIMI_rec, + RLWIMI, + RLWIMI8, + RLWIMI8_rec, + RLWIMI_rec, + RLWINM, + RLWINM8, + RLWINM8_rec, + RLWINM_rec, + RLWNM, + RLWNM8, + RLWNM8_rec, + RLWNM_rec, + SETB, + SETB8, + SETBC, + SETBC8, + SETBCR, + SETBCR8, + SETNBC, + SETNBC8, + SETNBCR, + SETNBCR8, + SLD, + SLD_rec, + SLW, + SLW8, + SLW8_rec, + SLW_rec, + SRAD, + SRADI, + SRADI_32, + SRAW, + SRAWI, + SRD, + SRD_rec, + SRW, + SRW8, + SRW8_rec, + SRW_rec, + SUBF, + SUBF8, + SUBF8O, + SUBF8_rec, + SUBFE, + SUBFE8, + SUBFE8O, + SUBFEO, + SUBFIC, + SUBFIC8, + SUBFME, + SUBFME8, + SUBFME8O, + SUBFMEO, + SUBFO, + SUBFZE, + SUBFZE8, + SUBFZE8O, + SUBFZEO, + SUBF_rec, + VABSDUB, + VABSDUH, + VABSDUW, + VADDCUW, + VADDSBS, + VADDSHS, + VADDSWS, + VADDUBM, + VADDUBS, + VADDUDM, + VADDUHM, + VADDUHS, + VADDUWM, + VADDUWS, + VAND, + VANDC, + VAVGSB, + VAVGSH, + VAVGSW, + VAVGUB, + VAVGUH, + VAVGUW, + VCLZB, + VCLZD, + VCLZH, + VCLZW, + VCMPBFP, + VCMPBFP_rec, + VCMPEQFP, + VCMPEQFP_rec, + VCMPEQUB, + VCMPEQUB_rec, + VCMPEQUD, + VCMPEQUD_rec, + VCMPEQUH, + VCMPEQUH_rec, + VCMPEQUQ, + VCMPEQUQ_rec, + VCMPEQUW, + VCMPEQUW_rec, + VCMPGEFP, + VCMPGEFP_rec, + VCMPGTFP, + VCMPGTFP_rec, + VCMPGTSB, + VCMPGTSB_rec, + VCMPGTSD, + VCMPGTSD_rec, + VCMPGTSH, + VCMPGTSH_rec, + VCMPGTSQ, + VCMPGTSQ_rec, + VCMPGTSW, + VCMPGTSW_rec, + VCMPGTUB, + VCMPGTUB_rec, + VCMPGTUD, + VCMPGTUD_rec, + VCMPGTUH, + VCMPGTUH_rec, + VCMPGTUQ, + VCMPGTUQ_rec, + VCMPGTUW, + VCMPGTUW_rec, + VCMPNEB, + VCMPNEB_rec, + VCMPNEH, + VCMPNEH_rec, + VCMPNEW, + VCMPNEW_rec, + VCMPNEZB, + VCMPNEZB_rec, + VCMPNEZH, + VCMPNEZH_rec, + VCMPNEZW, + VCMPNEZW_rec, + VCNTMBB, + VCNTMBD, + VCNTMBH, + VCNTMBW, + VCTZB, + VCTZD, + VCTZH, + VCTZW, + VEQV, + VEXPANDBM, + VEXPANDDM, + VEXPANDHM, + VEXPANDQM, + VEXPANDWM, + VEXTRACTBM, + VEXTRACTDM, + VEXTRACTHM, + VEXTRACTQM, + VEXTRACTWM, + VEXTSB2D, + VEXTSB2Ds, + VEXTSB2W, + VEXTSB2Ws, + VEXTSD2Q, + VEXTSH2D, + VEXTSH2Ds, + VEXTSH2W, + VEXTSH2Ws, + VEXTSW2D, + VEXTSW2Ds, + VMAXFP, + VMAXSB, + VMAXSD, + VMAXSH, + VMAXSW, + VMAXUB, + VMAXUD, + VMAXUH, + VMAXUW, + VMINFP, + VMINSB, + VMINSD, + VMINSH, + VMINSW, + VMINUB, + VMINUD, + VMINUH, + VMINUW, + VMRGEW, + VMRGOW, + VNAND, + VNEGD, + VNEGW, + VNOR, + VOR, + VORC, + VPOPCNTB, + VPOPCNTD, + VPOPCNTH, + VPOPCNTW, + VPRTYBD, + VPRTYBW, + VRLB, + VRLD, + VRLDMI, + VRLDNM, + VRLH, + VRLW, + VRLWMI, + VRLWNM, + VSEL, + VSHASIGMAD, + VSHASIGMAW, + VSLB, + VSLD, + VSLH, + VSLW, + VSRAB, + VSRAD, + VSRAH, + VSRAW, + VSRB, + VSRD, + VSRH, + VSRW, + VSUBCUW, + VSUBSBS, + VSUBSHS, + VSUBSWS, + VSUBUBM, + VSUBUBS, + VSUBUDM, + VSUBUHM, + VSUBUHS, + VSUBUWM, + VSUBUWS, + VXOR, + V_SET0, + V_SET0B, + V_SET0H, + XOR, + XOR8, + XOR8_rec, + XORI, + XORI8, + XORIS, + XORIS8, + XOR_rec, + XSABSDP, + XSABSQP, + XSCMPEQDP, + XSCMPGEDP, + XSCMPGTDP, + XSCPSGNDP, + XSCPSGNQP, + XSCVHPDP, + XSCVSPDPN, + XSIEXPDP, + XSIEXPQP, + XSMAXCDP, + XSMAXDP, + XSMAXJDP, + XSMINCDP, + XSMINDP, + XSMINJDP, + XSNABSDP, + XSNABSQP, + XSNEGDP, + XSNEGQP, + XSXEXPDP, + XSXEXPQP, + XSXSIGDP, + XVABSDP, + XVABSSP, + XVCMPEQDP, + XVCMPEQDP_rec, + XVCMPEQSP, + XVCMPEQSP_rec, + XVCMPGEDP, + XVCMPGEDP_rec, + XVCMPGESP, + XVCMPGESP_rec, + XVCMPGTDP, + XVCMPGTDP_rec, + XVCMPGTSP, + XVCMPGTSP_rec, + XVCPSGNDP, + XVCPSGNSP, + XVCVHPSP, + XVIEXPDP, + XVIEXPSP, + XVMAXDP, + XVMAXSP, + XVMINDP, + XVMINSP, + XVNABSDP, + XVNABSSP, + XVNEGDP, + XVNEGSP, + XVTSTDCDP, + XVTSTDCSP, + XVXEXPDP, + XVXEXPSP, + XVXSIGDP, + XVXSIGSP, + XXLAND, + XXLANDC, + XXLEQV, + XXLEQVOnes, + XXLNAND, + XXLNOR, + XXLOR, + XXLORC, + XXLORf, + XXLXOR, + XXLXORdpz, + XXLXORspz, + XXLXORz, + XXSEL), + FUSION_OP_SET(ADD4, + ADD4O, + ADD4TLS, + ADD4_rec, + ADD8, + ADD8O, + ADD8TLS, + ADD8TLS_, + ADD8_rec, + ADDE, + ADDE8, + ADDE8O, + ADDEO, + ADDEX, + ADDEX8, + ADDI, + ADDI8, + ADDIC, + ADDIC8, + ADDIS, + ADDIS8, + ADDISdtprelHA32, + ADDIStocHA, + ADDIStocHA8, + ADDIdtprelL32, + ADDItlsldLADDR32, + ADDItocL, + ADDME, + ADDME8, + ADDME8O, + ADDMEO, + ADDZE, + ADDZE8, + ADDZE8O, + ADDZEO, + AND, + AND8, + AND8_rec, + ANDC, + ANDC8, + ANDC8_rec, + ANDC_rec, + ANDI8_rec, + ANDIS8_rec, + ANDIS_rec, + ANDI_rec, + AND_rec, + CMPB, + CMPB8, + CMPD, + CMPDI, + CMPEQB, + CMPLD, + CMPLDI, + CMPLW, + CMPLWI, + CMPRB, + CMPRB8, + CMPW, + CMPWI, + CNTLZD, + CNTLZD_rec, + CNTLZW, + CNTLZW8, + CNTLZW8_rec, + CNTLZW_rec, + CNTTZD, + CNTTZD_rec, + CNTTZW, + CNTTZW8, + CNTTZW8_rec, + CNTTZW_rec, + CR6SET, + CR6UNSET, + CRAND, + CRANDC, + CREQV, + CRNAND, + CRNOR, + CROR, + CRORC, + CRSET, + CRUNSET, + CRXOR, + DSS, + DSSALL, + DST, + DST64, + DSTST, + DSTST64, + DSTSTT, + DSTSTT64, + DSTT, + DSTT64, + EQV, + EQV8, + EQV8_rec, + EQV_rec, + EXTSB, + EXTSB8, + EXTSB8_32_64, + EXTSB8_rec, + EXTSB_rec, + EXTSH, + EXTSH8, + EXTSH8_32_64, + EXTSH8_rec, + EXTSH_rec, + EXTSW, + EXTSWSLI, + EXTSWSLI_32_64, + EXTSWSLI_32_64_rec, + EXTSWSLI_rec, + EXTSW_32, + EXTSW_32_64, + EXTSW_32_64_rec, + EXTSW_rec, + FABSD, + FABSS, + FCMPOD, + FCMPOS, + FCMPUD, + FCMPUS, + FCPSGND, + FCPSGNS, + FMR, + FNABSD, + FNABSS, + FNEGD, + FNEGS, + FTDIV, + FTSQRT, + ISEL, + ISEL8, + LI, + LI8, + LIS, + LIS8, + MCRF, + MCRXRX, + MFCTR, + MFCTR8, + MFLR, + MFLR8, + MFOCRF, + MFOCRF8, + MFVRD, + MFVRWZ, + MFVSRD, + MFVSRWZ, + MTCTR, + MTCTR8, + MTCTR8loop, + MTCTRloop, + MTLR, + MTLR8, + MTOCRF, + MTOCRF8, + MTVRD, + MTVRWA, + MTVRWZ, + MTVSRBM, + MTVSRBMI, + MTVSRD, + MTVSRDM, + MTVSRHM, + MTVSRQM, + MTVSRWA, + MTVSRWM, + MTVSRWZ, + NAND, + NAND8, + NAND8_rec, + NAND_rec, + NEG, + NEG8, + NEG8O, + NEG8_rec, + NEGO, + NEG_rec, + NOP, + NOP_GT_PWR6, + NOP_GT_PWR7, + NOR, + NOR8, + NOR8_rec, + NOR_rec, + OR, + OR8, + OR8_rec, + ORC, + ORC8, + ORC8_rec, + ORC_rec, + ORI, + ORI8, + ORIS, + ORIS8, + OR_rec, + POPCNTB, + POPCNTB8, + POPCNTD, + POPCNTW, + RLDCL, + RLDCL_rec, + RLDCR, + RLDCR_rec, + RLDIC, + RLDICL, + RLDICL_32, + RLDICL_32_64, + RLDICL_32_rec, + RLDICL_rec, + RLDICR, + RLDICR_32, + RLDICR_rec, + RLDIC_rec, + RLDIMI, + RLDIMI_rec, + RLWIMI, + RLWIMI8, + RLWIMI8_rec, + RLWIMI_rec, + RLWINM, + RLWINM8, + RLWINM8_rec, + RLWINM_rec, + RLWNM, + RLWNM8, + RLWNM8_rec, + RLWNM_rec, + SETB, + SETB8, + SETBC, + SETBC8, + SETBCR, + SETBCR8, + SETNBC, + SETNBC8, + SETNBCR, + SETNBCR8, + SLD, + SLD_rec, + SLW, + SLW8, + SLW8_rec, + SLW_rec, + SRAD, + SRADI, + SRADI_32, + SRAW, + SRAWI, + SRD, + SRD_rec, + SRW, + SRW8, + SRW8_rec, + SRW_rec, + SUBF, + SUBF8, + SUBF8O, + SUBF8_rec, + SUBFE, + SUBFE8, + SUBFE8O, + SUBFEO, + SUBFIC, + SUBFIC8, + SUBFME, + SUBFME8, + SUBFME8O, + SUBFMEO, + SUBFO, + SUBFZE, + SUBFZE8, + SUBFZE8O, + SUBFZEO, + SUBF_rec, + TD, + TDI, + TRAP, + TW, + TWI, + VABSDUB, + VABSDUH, + VABSDUW, + VADDCUW, + VADDSBS, + VADDSHS, + VADDSWS, + VADDUBM, + VADDUBS, + VADDUDM, + VADDUHM, + VADDUHS, + VADDUWM, + VADDUWS, + VAND, + VANDC, + VAVGSB, + VAVGSH, + VAVGSW, + VAVGUB, + VAVGUH, + VAVGUW, + VCLZB, + VCLZD, + VCLZH, + VCLZW, + VCMPBFP, + VCMPBFP_rec, + VCMPEQFP, + VCMPEQFP_rec, + VCMPEQUB, + VCMPEQUB_rec, + VCMPEQUD, + VCMPEQUD_rec, + VCMPEQUH, + VCMPEQUH_rec, + VCMPEQUQ, + VCMPEQUQ_rec, + VCMPEQUW, + VCMPEQUW_rec, + VCMPGEFP, + VCMPGEFP_rec, + VCMPGTFP, + VCMPGTFP_rec, + VCMPGTSB, + VCMPGTSB_rec, + VCMPGTSD, + VCMPGTSD_rec, + VCMPGTSH, + VCMPGTSH_rec, + VCMPGTSQ, + VCMPGTSQ_rec, + VCMPGTSW, + VCMPGTSW_rec, + VCMPGTUB, + VCMPGTUB_rec, + VCMPGTUD, + VCMPGTUD_rec, + VCMPGTUH, + VCMPGTUH_rec, + VCMPGTUQ, + VCMPGTUQ_rec, + VCMPGTUW, + VCMPGTUW_rec, + VCMPNEB, + VCMPNEB_rec, + VCMPNEH, + VCMPNEH_rec, + VCMPNEW, + VCMPNEW_rec, + VCMPNEZB, + VCMPNEZB_rec, + VCMPNEZH, + VCMPNEZH_rec, + VCMPNEZW, + VCMPNEZW_rec, + VCMPSQ, + VCMPUQ, + VCNTMBB, + VCNTMBD, + VCNTMBH, + VCNTMBW, + VCTZB, + VCTZD, + VCTZH, + VCTZW, + VEQV, + VEXPANDBM, + VEXPANDDM, + VEXPANDHM, + VEXPANDQM, + VEXPANDWM, + VEXTRACTBM, + VEXTRACTDM, + VEXTRACTHM, + VEXTRACTQM, + VEXTRACTWM, + VEXTSB2D, + VEXTSB2Ds, + VEXTSB2W, + VEXTSB2Ws, + VEXTSD2Q, + VEXTSH2D, + VEXTSH2Ds, + VEXTSH2W, + VEXTSH2Ws, + VEXTSW2D, + VEXTSW2Ds, + VMAXFP, + VMAXSB, + VMAXSD, + VMAXSH, + VMAXSW, + VMAXUB, + VMAXUD, + VMAXUH, + VMAXUW, + VMINFP, + VMINSB, + VMINSD, + VMINSH, + VMINSW, + VMINUB, + VMINUD, + VMINUH, + VMINUW, + VMRGEW, + VMRGOW, + VNAND, + VNEGD, + VNEGW, + VNOR, + VOR, + VORC, + VPOPCNTB, + VPOPCNTD, + VPOPCNTH, + VPOPCNTW, + VPRTYBD, + VPRTYBW, + VRLB, + VRLD, + VRLDMI, + VRLDNM, + VRLH, + VRLW, + VRLWMI, + VRLWNM, + VSEL, + VSHASIGMAD, + VSHASIGMAW, + VSLB, + VSLD, + VSLH, + VSLW, + VSRAB, + VSRAD, + VSRAH, + VSRAW, + VSRB, + VSRD, + VSRH, + VSRW, + VSUBCUW, + VSUBSBS, + VSUBSHS, + VSUBSWS, + VSUBUBM, + VSUBUBS, + VSUBUDM, + VSUBUHM, + VSUBUHS, + VSUBUWM, + VSUBUWS, + VXOR, + V_SET0, + V_SET0B, + V_SET0H, + WAIT, + XOR, + XOR8, + XOR8_rec, + XORI, + XORI8, + XORIS, + XORIS8, + XOR_rec, + XSABSDP, + XSABSQP, + XSCMPEQDP, + XSCMPEXPDP, + XSCMPGEDP, + XSCMPGTDP, + XSCMPODP, + XSCMPUDP, + XSCPSGNDP, + XSCPSGNQP, + XSCVHPDP, + XSCVSPDPN, + XSIEXPDP, + XSIEXPQP, + XSMAXCDP, + XSMAXDP, + XSMAXJDP, + XSMINCDP, + XSMINDP, + XSMINJDP, + XSNABSDP, + XSNABSQP, + XSNEGDP, + XSNEGQP, + XSTDIVDP, + XSTSQRTDP, + XSTSTDCDP, + XSTSTDCSP, + XSXEXPDP, + XSXEXPQP, + XSXSIGDP, + XVABSDP, + XVABSSP, + XVCMPEQDP, + XVCMPEQDP_rec, + XVCMPEQSP, + XVCMPEQSP_rec, + XVCMPGEDP, + XVCMPGEDP_rec, + XVCMPGESP, + XVCMPGESP_rec, + XVCMPGTDP, + XVCMPGTDP_rec, + XVCMPGTSP, + XVCMPGTSP_rec, + XVCPSGNDP, + XVCPSGNSP, + XVCVHPSP, + XVIEXPDP, + XVIEXPSP, + XVMAXDP, + XVMAXSP, + XVMINDP, + XVMINSP, + XVNABSDP, + XVNABSSP, + XVNEGDP, + XVNEGSP, + XVTDIVDP, + XVTDIVSP, + XVTLSBB, + XVTSQRTDP, + XVTSQRTSP, + XVTSTDCDP, + XVTSTDCSP, + XVXEXPDP, + XVXEXPSP, + XVXSIGDP, + XVXSIGSP, + XXLAND, + XXLANDC, + XXLEQV, + XXLEQVOnes, + XXLNAND, + XXLNOR, + XXLOR, + XXLORC, + XXLORf, + XXLXOR, + XXLXORdpz, + XXLXORspz, + XXLXORz, + XXSEL))
\ No newline at end of file diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index a2664bcff4ab..ba74af5ef5f7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4464,9 +4464,10 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { LoadSDNode *LDN = dyn_cast<LoadSDNode>(N); StoreSDNode *STN = dyn_cast<StoreSDNode>(N); + MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N); SDValue AddrOp; - if (LDN) - AddrOp = LDN->getOperand(1); + if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT)) + AddrOp = N->getOperand(1); else if (STN) AddrOp = STN->getOperand(2); @@ -5973,6 +5974,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) { if (Type != MVT::v16i8 && Type != MVT::v8i16) break; + // If the alignment for the load is 16 or bigger, we don't need the + // permutated mask to get the required value. The value must be the 0 + // element in big endian target or 7/15 in little endian target in the + // result vsx register of lvx instruction. + // Select the instruction in the .td file. + if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) && + isOffsetMultipleOf(N, 16)) + break; + SDValue ZeroReg = CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO, Subtarget->isPPC64() ? MVT::i64 : MVT::i32); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ec7e30d7e362..8d6edf07bc53 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3500,15 +3500,16 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (LHS.getValueType() == MVT::v2i64) { // Equality can be handled by casting to the legal type for Altivec // comparisons, everything else needs to be expanded. - if (CC == ISD::SETEQ || CC == ISD::SETNE) { - return DAG.getNode( - ISD::BITCAST, dl, MVT::v2i64, - DAG.getSetCC(dl, MVT::v4i32, - DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS), - DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC)); - } - - return SDValue(); + if (CC != ISD::SETEQ && CC != ISD::SETNE) + return SDValue(); + SDValue SetCC32 = DAG.getSetCC( + dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS), + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC); + int ShuffV[] = {1, 0, 3, 2}; + SDValue Shuff = + DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV); + return DAG.getBitcast( + MVT::v2i64, DAG.getNode(ISD::AND, dl, MVT::v4i32, Shuff, SetCC32)); } // We handle most of these in the usual way. @@ -6206,20 +6207,13 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( ArgOffset += PtrByteSize; continue; } - // Copy entire object into memory. There are cases where gcc-generated - // code assumes it is there, even if it could be put entirely into - // registers. (This is not what the doc says.) - - // FIXME: The above statement is likely due to a misunderstanding of the - // documents. All arguments must be copied into the parameter area BY - // THE CALLEE in the event that the callee takes the address of any - // formal argument. That has not yet been implemented. However, it is - // reasonable to use the stack area as a staging area for the register - // load. - - // Skip this for small aggregates, as we will use the same slot for a - // right-justified copy, below. - if (Size >= 8) + // Copy the object to parameter save area if it can not be entirely passed + // by registers. + // FIXME: we only need to copy the parts which need to be passed in + // parameter save area. For the parts passed by registers, we don't need + // to copy them to the stack although we need to allocate space for them + // in parameter save area. + if ((NumGPRs - GPR_idx) * PtrByteSize < Size) Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, CallSeqStart, Flags, DAG, dl); @@ -17548,14 +17542,14 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N, if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) || (ParentOp == ISD::INTRINSIC_VOID))) { unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue(); - assert( - ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) && - "Only the paired load and store (lxvp/stxvp) intrinsics are valid."); - SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp) ? Parent->getOperand(2) - : Parent->getOperand(3); - computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG); - FlagSet |= PPC::MOF_Vector; - return FlagSet; + if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) { + SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp) + ? Parent->getOperand(2) + : Parent->getOperand(3); + computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG); + FlagSet |= PPC::MOF_Vector; + return FlagSet; + } } // Mark this as something we don't want to handle here if it is atomic diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 2cfd53de3290..c16e146da247 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -393,7 +393,9 @@ public: MachineInstr &NewMI1, MachineInstr &NewMI2) const override; - void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const override; + // PowerPC specific version of setSpecialOperandAttr that copies Flags to MI + // and clears nuw, nsw, and exact flags. + void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const; bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index d83ecc699b19..2340be5b5915 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -4780,6 +4780,7 @@ class PPCAsmPseudo<string asm, dag iops> def : InstAlias<"sc", (SC 0)>; def : InstAlias<"sync", (SYNC 0)>, Requires<[HasSYNC]>; +def : InstAlias<"hwsync", (SYNC 0), 0>, Requires<[HasSYNC]>; def : InstAlias<"msync", (SYNC 0), 0>, Requires<[HasSYNC]>; def : InstAlias<"lwsync", (SYNC 1)>, Requires<[HasSYNC]>; def : InstAlias<"ptesync", (SYNC 2)>, Requires<[HasSYNC]>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index d92a10c5b208..110f7d79fbc5 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -158,6 +158,11 @@ def HasP9Vector : Predicate<"Subtarget->hasP9Vector()">; def NoP9Altivec : Predicate<"!Subtarget->hasP9Altivec()">; def NoP10Vector: Predicate<"!Subtarget->hasP10Vector()">; +def PPCldsplatAlign16 : PatFrag<(ops node:$ptr), (PPCldsplat node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) && + isOffsetMultipleOf(N, 16); +}]>; + //--------------------- VSX-specific instruction formats ---------------------// // By default, all VSX instructions are to be selected over their Altivec // counter parts and they do not have unmodeled sideeffects. @@ -3180,6 +3185,12 @@ defm : ScalToVecWPermute< v2f64, (f64 (load ForceXForm:$src)), (XXPERMDIs (XFLOADf64 ForceXForm:$src), 2), (SUBREG_TO_REG (i64 1), (XFLOADf64 ForceXForm:$src), sub_64)>; + +// Splat loads. +def : Pat<(v8i16 (PPCldsplatAlign16 ForceXForm:$A)), + (v8i16 (VSPLTH 7, (LVX ForceXForm:$A)))>; +def : Pat<(v16i8 (PPCldsplatAlign16 ForceXForm:$A)), + (v16i8 (VSPLTB 15, (LVX ForceXForm:$A)))>; } // HasVSX, NoP9Vector, IsLittleEndian let Predicates = [HasVSX, NoP9Vector, IsBigEndian] in { @@ -3187,6 +3198,12 @@ let Predicates = [HasVSX, NoP9Vector, IsBigEndian] in { (LXVD2X ForceXForm:$src)>; def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, ForceXForm:$dst), (STXVD2X $rS, ForceXForm:$dst)>; + + // Splat loads. + def : Pat<(v8i16 (PPCldsplatAlign16 ForceXForm:$A)), + (v8i16 (VSPLTH 0, (LVX ForceXForm:$A)))>; + def : Pat<(v16i8 (PPCldsplatAlign16 ForceXForm:$A)), + (v16i8 (VSPLTB 0, (LVX ForceXForm:$A)))>; } // HasVSX, NoP9Vector, IsBigEndian // Any VSX subtarget that only has loads and stores that load in big endian diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp index 7f63827afbd6..0c7be96a0595 100644 --- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp +++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp @@ -413,9 +413,9 @@ bool PPCLoopInstrFormPrep::runOnFunction(Function &F) { bool MadeChange = false; - for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) - for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) - MadeChange |= runOnLoop(*L); + for (Loop *I : *LI) + for (Loop *L : depth_first(I)) + MadeChange |= runOnLoop(L); return MadeChange; } diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def index e4954b722fd0..6b8ad22639c8 100644 --- a/llvm/lib/Target/PowerPC/PPCMacroFusion.def +++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def @@ -153,5 +153,7 @@ FUSION_FEATURE(ZeroMoveLR, hasZeroMoveFusion, -1, FUSION_OP_SET(MTLR8, MTLR, MTSPR8, MTSPR), FUSION_OP_SET(BCLR, BCLRn, gBCLR, BCLRL, BCLRLn, gBCLRL)) +#include "PPCBack2BackFusion.def" + #undef FUSION_FEATURE #undef FUSION_OP_SET diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 1258a1281597..f11b4e14073e 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -135,6 +135,7 @@ void PPCSubtarget::initializeEnvironment() { HasCompareFusion = false; HasWideImmFusion = false; HasZeroMoveFusion = false; + HasBack2BackFusion = false; IsISA2_06 = false; IsISA2_07 = false; IsISA3_0 = false; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index d52833cb1465..1300b62b623a 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -155,6 +155,7 @@ protected: bool HasCompareFusion; bool HasWideImmFusion; bool HasZeroMoveFusion; + bool HasBack2BackFusion; bool IsISA2_06; bool IsISA2_07; bool IsISA3_0; @@ -348,6 +349,7 @@ public: bool hasWideImmFusion() const { return HasWideImmFusion; } bool hasSha3Fusion() const { return HasSha3Fusion; } bool hasZeroMoveFusion() const { return HasZeroMoveFusion; } + bool hasBack2BackFusion() const { return HasBack2BackFusion; } bool needsSwapsForVSXMemOps() const { return hasVSX() && isLittleEndian() && !hasP9Vector(); } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 5d6f58a77a39..ed28731b8ef2 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -328,10 +328,6 @@ static bool isMMAType(Type *Ty) { InstructionCost PPCTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands, TTI::TargetCostKind CostKind) { - // Set the max cost if an MMA type is present (v256i1, v512i1). - if (isMMAType(U->getType())) - return InstructionCost::getMax(); - // We already implement getCastInstrCost and getMemoryOpCost where we perform // the vector adjustment there. if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U)) @@ -1276,23 +1272,21 @@ PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return BaseT::getIntrinsicInstrCost(ICA, CostKind); } -bool PPCTTIImpl::areFunctionArgsABICompatible( - const Function *Caller, const Function *Callee, - SmallPtrSetImpl<Argument *> &Args) const { +bool PPCTTIImpl::areTypesABICompatible(const Function *Caller, + const Function *Callee, + const ArrayRef<Type *> &Types) const { // We need to ensure that argument promotion does not // attempt to promote pointers to MMA types (__vector_pair // and __vector_quad) since these types explicitly cannot be // passed as arguments. Both of these types are larger than // the 128-bit Altivec vectors and have a scalar size of 1 bit. - if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) + if (!BaseT::areTypesABICompatible(Caller, Callee, Types)) return false; - return llvm::none_of(Args, [](Argument *A) { - auto *EltTy = cast<PointerType>(A->getType())->getElementType(); - if (EltTy->isSized()) - return (EltTy->isIntOrIntVectorTy(1) && - EltTy->getPrimitiveSizeInBits() > 128); + return llvm::none_of(Types, [](Type *Ty) { + if (Ty->isSized()) + return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128; return false; }); } @@ -1388,3 +1382,86 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, return false; } + +bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType, + Align Alignment) const { + // Only load and stores instructions can have variable vector length on Power. + if (Opcode != Instruction::Load && Opcode != Instruction::Store) + return false; + // Loads/stores with length instructions use bits 0-7 of the GPR operand and + // therefore cannot be used in 32-bit mode. + if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64()) + return false; + if (isa<FixedVectorType>(DataType)) { + unsigned VecWidth = DataType->getPrimitiveSizeInBits(); + return VecWidth == 128; + } + Type *ScalarTy = DataType->getScalarType(); + + if (ScalarTy->isPointerTy()) + return true; + + if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) + return true; + + if (!ScalarTy->isIntegerTy()) + return false; + + unsigned IntWidth = ScalarTy->getIntegerBitWidth(); + return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64; +} + +InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src, + Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) { + InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment, + AddressSpace, CostKind, I); + if (TLI->getValueType(DL, Src, true) == MVT::Other) + return Cost; + // TODO: Handle other cost kinds. + if (CostKind != TTI::TCK_RecipThroughput) + return Cost; + + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); + + auto *SrcVTy = dyn_cast<FixedVectorType>(Src); + assert(SrcVTy && "Expected a vector type for VP memory operations"); + + if (hasActiveVectorLength(Opcode, Src, Alignment)) { + std::pair<InstructionCost, MVT> LT = + TLI->getTypeLegalizationCost(DL, SrcVTy); + + InstructionCost CostFactor = + vectorCostAdjustmentFactor(Opcode, Src, nullptr); + if (!CostFactor.isValid()) + return InstructionCost::getMax(); + + InstructionCost Cost = LT.first * CostFactor; + assert(Cost.isValid() && "Expected valid cost"); + + // On P9 but not on P10, if the op is misaligned then it will cause a + // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked + // ones. + const Align DesiredAlignment(16); + if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9) + return Cost; + + // Since alignment may be under estimated, we try to compute the probability + // that the actual address is aligned to the desired boundary. For example + // an 8-byte aligned load is assumed to be actually 16-byte aligned half the + // time, while a 4-byte aligned load has a 25% chance of being 16-byte + // aligned. + float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value(); + float MisalignmentProb = 1.0 - AlignmentProb; + return (MisalignmentProb * P9PipelineFlushEstimate) + + (AlignmentProb * *Cost.getValue()); + } + + // Usually we should not get to this point, but the following is an attempt to + // model the cost of legalization. Currently we can only lower intrinsics with + // evl but no mask, on Power 9/10. Otherwise, we must scalarize. + return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); +} diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 7aeb0c59d503..0af6f2a308d9 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -134,9 +134,19 @@ public: bool UseMaskForCond = false, bool UseMaskForGaps = false); InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); - bool areFunctionArgsABICompatible(const Function *Caller, - const Function *Callee, - SmallPtrSetImpl<Argument *> &Args) const; + bool areTypesABICompatible(const Function *Caller, const Function *Callee, + const ArrayRef<Type *> &Types) const; + bool hasActiveVectorLength(unsigned Opcode, Type *DataType, + Align Alignment) const; + InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); + +private: + // The following constant is used for estimating costs on power9. + static const InstructionCost::CostType P9PipelineFlushEstimate = 80; + /// @} }; |