aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/PowerPC
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-12-25 22:30:44 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-12-25 22:30:44 +0000
commit77fc4c146f0870ffb09c1afb823ccbe742c5e6ff (patch)
tree5c0eb39553003b9c75a901af6bc4ddabd6f2f28c /llvm/lib/Target/PowerPC
parentf65dcba83ce5035ab88a85fe17628b447eb56e1b (diff)
downloadsrc-77fc4c146f0870ffb09c1afb823ccbe742c5e6ff.tar.gz
src-77fc4c146f0870ffb09c1afb823ccbe742c5e6ff.zip
Diffstat (limited to 'llvm/lib/Target/PowerPC')
-rw-r--r--llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp12
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp29
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td4
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/PPCBack2BackFusion.def1042
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp14
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp56
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.h4
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td1
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td17
-rw-r--r--llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/PPCMacroFusion.def2
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp103
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h16
16 files changed, 1255 insertions, 55 deletions
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 9e181d4052d6..ded922329ebf 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -1576,6 +1576,16 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
std::swap(Operands[2], Operands[1]);
}
+ // Handle base mnemonic for atomic loads where the EH bit is zero.
+ if (Name == "lqarx" || Name == "ldarx" || Name == "lwarx" ||
+ Name == "lharx" || Name == "lbarx") {
+ if (Operands.size() != 5)
+ return false;
+ PPCOperand &EHOp = (PPCOperand &)*Operands[4];
+ if (EHOp.isU1Imm() && EHOp.getImm() == 0)
+ Operands.pop_back();
+ }
+
return false;
}
@@ -1745,7 +1755,7 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
}
PPCOperand &Op = static_cast<PPCOperand &>(AsmOp);
- if (Op.isImm() && Op.getImm() == ImmVal)
+ if (Op.isU3Imm() && Op.getImm() == ImmVal)
return Match_Success;
return Match_InvalidOperand;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 22b948a83c34..d6e02d0d0862 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -28,6 +28,7 @@
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -368,6 +369,31 @@ static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
return new PPCInstPrinter(MAI, MII, MRI, T);
}
+namespace {
+
+class PPCMCInstrAnalysis : public MCInstrAnalysis {
+public:
+ explicit PPCMCInstrAnalysis(const MCInstrInfo *Info)
+ : MCInstrAnalysis(Info) {}
+
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const override {
+ unsigned NumOps = Inst.getNumOperands();
+ if (NumOps == 0 ||
+ Info->get(Inst.getOpcode()).OpInfo[NumOps - 1].OperandType !=
+ MCOI::OPERAND_PCREL)
+ return false;
+ Target = Addr + Inst.getOperand(NumOps - 1).getImm() * Size;
+ return true;
+ }
+};
+
+} // end anonymous namespace
+
+static MCInstrAnalysis *createPPCMCInstrAnalysis(const MCInstrInfo *Info) {
+ return new PPCMCInstrAnalysis(Info);
+}
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetMC() {
for (Target *T : {&getThePPC32Target(), &getThePPC32LETarget(),
&getThePPC64Target(), &getThePPC64LETarget()}) {
@@ -383,6 +409,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetMC() {
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(*T, createPPCMCSubtargetInfo);
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createPPCMCInstrAnalysis);
+
// Register the MC Code Emitter
TargetRegistry::RegisterMCCodeEmitter(*T, createPPCMCCodeEmitter);
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 422bd11dca52..bbd5f5fd1941 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -219,6 +219,10 @@ def FeatureZeroMoveFusion:
SubtargetFeature<"fuse-zeromove", "HasZeroMoveFusion", "true",
"Target supports move to SPR with branch fusion",
[FeatureFusion]>;
+def FeatureBack2BackFusion:
+ SubtargetFeature<"fuse-back2back", "HasBack2BackFusion", "true",
+ "Target supports general back to back fusion",
+ [FeatureFusion]>;
def FeatureUnalignedFloats :
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
"true", "CPU does not trap on unaligned FP access">;
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 16e3b2b85c2e..f26c15667a0b 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -347,7 +347,6 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
// At the moment, all inline asm memory operands are a single register.
// In any case, the output of this routine should always be just one
// assembler operand.
-
bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode,
raw_ostream &O) {
diff --git a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
new file mode 100644
index 000000000000..38ed5f2e78e3
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
@@ -0,0 +1,1042 @@
+// Automatically generated file, do not edit!
+//
+// This file defines instruction list for general back2back fusion.
+//===----------------------------------------------------------------------===//
+FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
+ FUSION_OP_SET(ADD4,
+ ADD4O,
+ ADD4TLS,
+ ADD4_rec,
+ ADD8,
+ ADD8O,
+ ADD8TLS,
+ ADD8TLS_,
+ ADD8_rec,
+ ADDE,
+ ADDE8,
+ ADDE8O,
+ ADDEO,
+ ADDEX,
+ ADDEX8,
+ ADDI,
+ ADDI8,
+ ADDIC,
+ ADDIC8,
+ ADDIS,
+ ADDIS8,
+ ADDISdtprelHA32,
+ ADDIStocHA,
+ ADDIStocHA8,
+ ADDIdtprelL32,
+ ADDItlsldLADDR32,
+ ADDItocL,
+ ADDME,
+ ADDME8,
+ ADDME8O,
+ ADDMEO,
+ ADDZE,
+ ADDZE8,
+ ADDZE8O,
+ ADDZEO,
+ AND,
+ AND8,
+ AND8_rec,
+ ANDC,
+ ANDC8,
+ ANDC8_rec,
+ ANDC_rec,
+ ANDI8_rec,
+ ANDIS8_rec,
+ ANDIS_rec,
+ ANDI_rec,
+ AND_rec,
+ CMPB,
+ CMPB8,
+ CNTLZD,
+ CNTLZD_rec,
+ CNTLZW,
+ CNTLZW8,
+ CNTLZW8_rec,
+ CNTLZW_rec,
+ CNTTZD,
+ CNTTZD_rec,
+ CNTTZW,
+ CNTTZW8,
+ CNTTZW8_rec,
+ CNTTZW_rec,
+ EQV,
+ EQV8,
+ EQV8_rec,
+ EQV_rec,
+ EXTSB,
+ EXTSB8,
+ EXTSB8_32_64,
+ EXTSB8_rec,
+ EXTSB_rec,
+ EXTSH,
+ EXTSH8,
+ EXTSH8_32_64,
+ EXTSH8_rec,
+ EXTSH_rec,
+ EXTSW,
+ EXTSWSLI,
+ EXTSWSLI_32_64,
+ EXTSWSLI_32_64_rec,
+ EXTSWSLI_rec,
+ EXTSW_32,
+ EXTSW_32_64,
+ EXTSW_32_64_rec,
+ EXTSW_rec,
+ FABSD,
+ FABSS,
+ FCPSGND,
+ FCPSGNS,
+ FMR,
+ FNABSD,
+ FNABSS,
+ FNEGD,
+ FNEGS,
+ ISEL,
+ ISEL8,
+ LI,
+ LI8,
+ LIS,
+ LIS8,
+ MFCTR,
+ MFCTR8,
+ MFLR,
+ MFLR8,
+ MFOCRF,
+ MFOCRF8,
+ MFVRD,
+ MFVRWZ,
+ MFVSRD,
+ MFVSRWZ,
+ MTVRD,
+ MTVRWA,
+ MTVRWZ,
+ MTVSRBM,
+ MTVSRBMI,
+ MTVSRD,
+ MTVSRDM,
+ MTVSRHM,
+ MTVSRQM,
+ MTVSRWA,
+ MTVSRWM,
+ MTVSRWZ,
+ NAND,
+ NAND8,
+ NAND8_rec,
+ NAND_rec,
+ NEG,
+ NEG8,
+ NEG8O,
+ NEG8_rec,
+ NEGO,
+ NEG_rec,
+ NOP,
+ NOP_GT_PWR6,
+ NOP_GT_PWR7,
+ NOR,
+ NOR8,
+ NOR8_rec,
+ NOR_rec,
+ OR,
+ OR8,
+ OR8_rec,
+ ORC,
+ ORC8,
+ ORC8_rec,
+ ORC_rec,
+ ORI,
+ ORI8,
+ ORIS,
+ ORIS8,
+ OR_rec,
+ POPCNTB,
+ POPCNTB8,
+ POPCNTD,
+ POPCNTW,
+ RLDCL,
+ RLDCL_rec,
+ RLDCR,
+ RLDCR_rec,
+ RLDIC,
+ RLDICL,
+ RLDICL_32,
+ RLDICL_32_64,
+ RLDICL_32_rec,
+ RLDICL_rec,
+ RLDICR,
+ RLDICR_32,
+ RLDICR_rec,
+ RLDIC_rec,
+ RLDIMI,
+ RLDIMI_rec,
+ RLWIMI,
+ RLWIMI8,
+ RLWIMI8_rec,
+ RLWIMI_rec,
+ RLWINM,
+ RLWINM8,
+ RLWINM8_rec,
+ RLWINM_rec,
+ RLWNM,
+ RLWNM8,
+ RLWNM8_rec,
+ RLWNM_rec,
+ SETB,
+ SETB8,
+ SETBC,
+ SETBC8,
+ SETBCR,
+ SETBCR8,
+ SETNBC,
+ SETNBC8,
+ SETNBCR,
+ SETNBCR8,
+ SLD,
+ SLD_rec,
+ SLW,
+ SLW8,
+ SLW8_rec,
+ SLW_rec,
+ SRAD,
+ SRADI,
+ SRADI_32,
+ SRAW,
+ SRAWI,
+ SRD,
+ SRD_rec,
+ SRW,
+ SRW8,
+ SRW8_rec,
+ SRW_rec,
+ SUBF,
+ SUBF8,
+ SUBF8O,
+ SUBF8_rec,
+ SUBFE,
+ SUBFE8,
+ SUBFE8O,
+ SUBFEO,
+ SUBFIC,
+ SUBFIC8,
+ SUBFME,
+ SUBFME8,
+ SUBFME8O,
+ SUBFMEO,
+ SUBFO,
+ SUBFZE,
+ SUBFZE8,
+ SUBFZE8O,
+ SUBFZEO,
+ SUBF_rec,
+ VABSDUB,
+ VABSDUH,
+ VABSDUW,
+ VADDCUW,
+ VADDSBS,
+ VADDSHS,
+ VADDSWS,
+ VADDUBM,
+ VADDUBS,
+ VADDUDM,
+ VADDUHM,
+ VADDUHS,
+ VADDUWM,
+ VADDUWS,
+ VAND,
+ VANDC,
+ VAVGSB,
+ VAVGSH,
+ VAVGSW,
+ VAVGUB,
+ VAVGUH,
+ VAVGUW,
+ VCLZB,
+ VCLZD,
+ VCLZH,
+ VCLZW,
+ VCMPBFP,
+ VCMPBFP_rec,
+ VCMPEQFP,
+ VCMPEQFP_rec,
+ VCMPEQUB,
+ VCMPEQUB_rec,
+ VCMPEQUD,
+ VCMPEQUD_rec,
+ VCMPEQUH,
+ VCMPEQUH_rec,
+ VCMPEQUQ,
+ VCMPEQUQ_rec,
+ VCMPEQUW,
+ VCMPEQUW_rec,
+ VCMPGEFP,
+ VCMPGEFP_rec,
+ VCMPGTFP,
+ VCMPGTFP_rec,
+ VCMPGTSB,
+ VCMPGTSB_rec,
+ VCMPGTSD,
+ VCMPGTSD_rec,
+ VCMPGTSH,
+ VCMPGTSH_rec,
+ VCMPGTSQ,
+ VCMPGTSQ_rec,
+ VCMPGTSW,
+ VCMPGTSW_rec,
+ VCMPGTUB,
+ VCMPGTUB_rec,
+ VCMPGTUD,
+ VCMPGTUD_rec,
+ VCMPGTUH,
+ VCMPGTUH_rec,
+ VCMPGTUQ,
+ VCMPGTUQ_rec,
+ VCMPGTUW,
+ VCMPGTUW_rec,
+ VCMPNEB,
+ VCMPNEB_rec,
+ VCMPNEH,
+ VCMPNEH_rec,
+ VCMPNEW,
+ VCMPNEW_rec,
+ VCMPNEZB,
+ VCMPNEZB_rec,
+ VCMPNEZH,
+ VCMPNEZH_rec,
+ VCMPNEZW,
+ VCMPNEZW_rec,
+ VCNTMBB,
+ VCNTMBD,
+ VCNTMBH,
+ VCNTMBW,
+ VCTZB,
+ VCTZD,
+ VCTZH,
+ VCTZW,
+ VEQV,
+ VEXPANDBM,
+ VEXPANDDM,
+ VEXPANDHM,
+ VEXPANDQM,
+ VEXPANDWM,
+ VEXTRACTBM,
+ VEXTRACTDM,
+ VEXTRACTHM,
+ VEXTRACTQM,
+ VEXTRACTWM,
+ VEXTSB2D,
+ VEXTSB2Ds,
+ VEXTSB2W,
+ VEXTSB2Ws,
+ VEXTSD2Q,
+ VEXTSH2D,
+ VEXTSH2Ds,
+ VEXTSH2W,
+ VEXTSH2Ws,
+ VEXTSW2D,
+ VEXTSW2Ds,
+ VMAXFP,
+ VMAXSB,
+ VMAXSD,
+ VMAXSH,
+ VMAXSW,
+ VMAXUB,
+ VMAXUD,
+ VMAXUH,
+ VMAXUW,
+ VMINFP,
+ VMINSB,
+ VMINSD,
+ VMINSH,
+ VMINSW,
+ VMINUB,
+ VMINUD,
+ VMINUH,
+ VMINUW,
+ VMRGEW,
+ VMRGOW,
+ VNAND,
+ VNEGD,
+ VNEGW,
+ VNOR,
+ VOR,
+ VORC,
+ VPOPCNTB,
+ VPOPCNTD,
+ VPOPCNTH,
+ VPOPCNTW,
+ VPRTYBD,
+ VPRTYBW,
+ VRLB,
+ VRLD,
+ VRLDMI,
+ VRLDNM,
+ VRLH,
+ VRLW,
+ VRLWMI,
+ VRLWNM,
+ VSEL,
+ VSHASIGMAD,
+ VSHASIGMAW,
+ VSLB,
+ VSLD,
+ VSLH,
+ VSLW,
+ VSRAB,
+ VSRAD,
+ VSRAH,
+ VSRAW,
+ VSRB,
+ VSRD,
+ VSRH,
+ VSRW,
+ VSUBCUW,
+ VSUBSBS,
+ VSUBSHS,
+ VSUBSWS,
+ VSUBUBM,
+ VSUBUBS,
+ VSUBUDM,
+ VSUBUHM,
+ VSUBUHS,
+ VSUBUWM,
+ VSUBUWS,
+ VXOR,
+ V_SET0,
+ V_SET0B,
+ V_SET0H,
+ XOR,
+ XOR8,
+ XOR8_rec,
+ XORI,
+ XORI8,
+ XORIS,
+ XORIS8,
+ XOR_rec,
+ XSABSDP,
+ XSABSQP,
+ XSCMPEQDP,
+ XSCMPGEDP,
+ XSCMPGTDP,
+ XSCPSGNDP,
+ XSCPSGNQP,
+ XSCVHPDP,
+ XSCVSPDPN,
+ XSIEXPDP,
+ XSIEXPQP,
+ XSMAXCDP,
+ XSMAXDP,
+ XSMAXJDP,
+ XSMINCDP,
+ XSMINDP,
+ XSMINJDP,
+ XSNABSDP,
+ XSNABSQP,
+ XSNEGDP,
+ XSNEGQP,
+ XSXEXPDP,
+ XSXEXPQP,
+ XSXSIGDP,
+ XVABSDP,
+ XVABSSP,
+ XVCMPEQDP,
+ XVCMPEQDP_rec,
+ XVCMPEQSP,
+ XVCMPEQSP_rec,
+ XVCMPGEDP,
+ XVCMPGEDP_rec,
+ XVCMPGESP,
+ XVCMPGESP_rec,
+ XVCMPGTDP,
+ XVCMPGTDP_rec,
+ XVCMPGTSP,
+ XVCMPGTSP_rec,
+ XVCPSGNDP,
+ XVCPSGNSP,
+ XVCVHPSP,
+ XVIEXPDP,
+ XVIEXPSP,
+ XVMAXDP,
+ XVMAXSP,
+ XVMINDP,
+ XVMINSP,
+ XVNABSDP,
+ XVNABSSP,
+ XVNEGDP,
+ XVNEGSP,
+ XVTSTDCDP,
+ XVTSTDCSP,
+ XVXEXPDP,
+ XVXEXPSP,
+ XVXSIGDP,
+ XVXSIGSP,
+ XXLAND,
+ XXLANDC,
+ XXLEQV,
+ XXLEQVOnes,
+ XXLNAND,
+ XXLNOR,
+ XXLOR,
+ XXLORC,
+ XXLORf,
+ XXLXOR,
+ XXLXORdpz,
+ XXLXORspz,
+ XXLXORz,
+ XXSEL),
+ FUSION_OP_SET(ADD4,
+ ADD4O,
+ ADD4TLS,
+ ADD4_rec,
+ ADD8,
+ ADD8O,
+ ADD8TLS,
+ ADD8TLS_,
+ ADD8_rec,
+ ADDE,
+ ADDE8,
+ ADDE8O,
+ ADDEO,
+ ADDEX,
+ ADDEX8,
+ ADDI,
+ ADDI8,
+ ADDIC,
+ ADDIC8,
+ ADDIS,
+ ADDIS8,
+ ADDISdtprelHA32,
+ ADDIStocHA,
+ ADDIStocHA8,
+ ADDIdtprelL32,
+ ADDItlsldLADDR32,
+ ADDItocL,
+ ADDME,
+ ADDME8,
+ ADDME8O,
+ ADDMEO,
+ ADDZE,
+ ADDZE8,
+ ADDZE8O,
+ ADDZEO,
+ AND,
+ AND8,
+ AND8_rec,
+ ANDC,
+ ANDC8,
+ ANDC8_rec,
+ ANDC_rec,
+ ANDI8_rec,
+ ANDIS8_rec,
+ ANDIS_rec,
+ ANDI_rec,
+ AND_rec,
+ CMPB,
+ CMPB8,
+ CMPD,
+ CMPDI,
+ CMPEQB,
+ CMPLD,
+ CMPLDI,
+ CMPLW,
+ CMPLWI,
+ CMPRB,
+ CMPRB8,
+ CMPW,
+ CMPWI,
+ CNTLZD,
+ CNTLZD_rec,
+ CNTLZW,
+ CNTLZW8,
+ CNTLZW8_rec,
+ CNTLZW_rec,
+ CNTTZD,
+ CNTTZD_rec,
+ CNTTZW,
+ CNTTZW8,
+ CNTTZW8_rec,
+ CNTTZW_rec,
+ CR6SET,
+ CR6UNSET,
+ CRAND,
+ CRANDC,
+ CREQV,
+ CRNAND,
+ CRNOR,
+ CROR,
+ CRORC,
+ CRSET,
+ CRUNSET,
+ CRXOR,
+ DSS,
+ DSSALL,
+ DST,
+ DST64,
+ DSTST,
+ DSTST64,
+ DSTSTT,
+ DSTSTT64,
+ DSTT,
+ DSTT64,
+ EQV,
+ EQV8,
+ EQV8_rec,
+ EQV_rec,
+ EXTSB,
+ EXTSB8,
+ EXTSB8_32_64,
+ EXTSB8_rec,
+ EXTSB_rec,
+ EXTSH,
+ EXTSH8,
+ EXTSH8_32_64,
+ EXTSH8_rec,
+ EXTSH_rec,
+ EXTSW,
+ EXTSWSLI,
+ EXTSWSLI_32_64,
+ EXTSWSLI_32_64_rec,
+ EXTSWSLI_rec,
+ EXTSW_32,
+ EXTSW_32_64,
+ EXTSW_32_64_rec,
+ EXTSW_rec,
+ FABSD,
+ FABSS,
+ FCMPOD,
+ FCMPOS,
+ FCMPUD,
+ FCMPUS,
+ FCPSGND,
+ FCPSGNS,
+ FMR,
+ FNABSD,
+ FNABSS,
+ FNEGD,
+ FNEGS,
+ FTDIV,
+ FTSQRT,
+ ISEL,
+ ISEL8,
+ LI,
+ LI8,
+ LIS,
+ LIS8,
+ MCRF,
+ MCRXRX,
+ MFCTR,
+ MFCTR8,
+ MFLR,
+ MFLR8,
+ MFOCRF,
+ MFOCRF8,
+ MFVRD,
+ MFVRWZ,
+ MFVSRD,
+ MFVSRWZ,
+ MTCTR,
+ MTCTR8,
+ MTCTR8loop,
+ MTCTRloop,
+ MTLR,
+ MTLR8,
+ MTOCRF,
+ MTOCRF8,
+ MTVRD,
+ MTVRWA,
+ MTVRWZ,
+ MTVSRBM,
+ MTVSRBMI,
+ MTVSRD,
+ MTVSRDM,
+ MTVSRHM,
+ MTVSRQM,
+ MTVSRWA,
+ MTVSRWM,
+ MTVSRWZ,
+ NAND,
+ NAND8,
+ NAND8_rec,
+ NAND_rec,
+ NEG,
+ NEG8,
+ NEG8O,
+ NEG8_rec,
+ NEGO,
+ NEG_rec,
+ NOP,
+ NOP_GT_PWR6,
+ NOP_GT_PWR7,
+ NOR,
+ NOR8,
+ NOR8_rec,
+ NOR_rec,
+ OR,
+ OR8,
+ OR8_rec,
+ ORC,
+ ORC8,
+ ORC8_rec,
+ ORC_rec,
+ ORI,
+ ORI8,
+ ORIS,
+ ORIS8,
+ OR_rec,
+ POPCNTB,
+ POPCNTB8,
+ POPCNTD,
+ POPCNTW,
+ RLDCL,
+ RLDCL_rec,
+ RLDCR,
+ RLDCR_rec,
+ RLDIC,
+ RLDICL,
+ RLDICL_32,
+ RLDICL_32_64,
+ RLDICL_32_rec,
+ RLDICL_rec,
+ RLDICR,
+ RLDICR_32,
+ RLDICR_rec,
+ RLDIC_rec,
+ RLDIMI,
+ RLDIMI_rec,
+ RLWIMI,
+ RLWIMI8,
+ RLWIMI8_rec,
+ RLWIMI_rec,
+ RLWINM,
+ RLWINM8,
+ RLWINM8_rec,
+ RLWINM_rec,
+ RLWNM,
+ RLWNM8,
+ RLWNM8_rec,
+ RLWNM_rec,
+ SETB,
+ SETB8,
+ SETBC,
+ SETBC8,
+ SETBCR,
+ SETBCR8,
+ SETNBC,
+ SETNBC8,
+ SETNBCR,
+ SETNBCR8,
+ SLD,
+ SLD_rec,
+ SLW,
+ SLW8,
+ SLW8_rec,
+ SLW_rec,
+ SRAD,
+ SRADI,
+ SRADI_32,
+ SRAW,
+ SRAWI,
+ SRD,
+ SRD_rec,
+ SRW,
+ SRW8,
+ SRW8_rec,
+ SRW_rec,
+ SUBF,
+ SUBF8,
+ SUBF8O,
+ SUBF8_rec,
+ SUBFE,
+ SUBFE8,
+ SUBFE8O,
+ SUBFEO,
+ SUBFIC,
+ SUBFIC8,
+ SUBFME,
+ SUBFME8,
+ SUBFME8O,
+ SUBFMEO,
+ SUBFO,
+ SUBFZE,
+ SUBFZE8,
+ SUBFZE8O,
+ SUBFZEO,
+ SUBF_rec,
+ TD,
+ TDI,
+ TRAP,
+ TW,
+ TWI,
+ VABSDUB,
+ VABSDUH,
+ VABSDUW,
+ VADDCUW,
+ VADDSBS,
+ VADDSHS,
+ VADDSWS,
+ VADDUBM,
+ VADDUBS,
+ VADDUDM,
+ VADDUHM,
+ VADDUHS,
+ VADDUWM,
+ VADDUWS,
+ VAND,
+ VANDC,
+ VAVGSB,
+ VAVGSH,
+ VAVGSW,
+ VAVGUB,
+ VAVGUH,
+ VAVGUW,
+ VCLZB,
+ VCLZD,
+ VCLZH,
+ VCLZW,
+ VCMPBFP,
+ VCMPBFP_rec,
+ VCMPEQFP,
+ VCMPEQFP_rec,
+ VCMPEQUB,
+ VCMPEQUB_rec,
+ VCMPEQUD,
+ VCMPEQUD_rec,
+ VCMPEQUH,
+ VCMPEQUH_rec,
+ VCMPEQUQ,
+ VCMPEQUQ_rec,
+ VCMPEQUW,
+ VCMPEQUW_rec,
+ VCMPGEFP,
+ VCMPGEFP_rec,
+ VCMPGTFP,
+ VCMPGTFP_rec,
+ VCMPGTSB,
+ VCMPGTSB_rec,
+ VCMPGTSD,
+ VCMPGTSD_rec,
+ VCMPGTSH,
+ VCMPGTSH_rec,
+ VCMPGTSQ,
+ VCMPGTSQ_rec,
+ VCMPGTSW,
+ VCMPGTSW_rec,
+ VCMPGTUB,
+ VCMPGTUB_rec,
+ VCMPGTUD,
+ VCMPGTUD_rec,
+ VCMPGTUH,
+ VCMPGTUH_rec,
+ VCMPGTUQ,
+ VCMPGTUQ_rec,
+ VCMPGTUW,
+ VCMPGTUW_rec,
+ VCMPNEB,
+ VCMPNEB_rec,
+ VCMPNEH,
+ VCMPNEH_rec,
+ VCMPNEW,
+ VCMPNEW_rec,
+ VCMPNEZB,
+ VCMPNEZB_rec,
+ VCMPNEZH,
+ VCMPNEZH_rec,
+ VCMPNEZW,
+ VCMPNEZW_rec,
+ VCMPSQ,
+ VCMPUQ,
+ VCNTMBB,
+ VCNTMBD,
+ VCNTMBH,
+ VCNTMBW,
+ VCTZB,
+ VCTZD,
+ VCTZH,
+ VCTZW,
+ VEQV,
+ VEXPANDBM,
+ VEXPANDDM,
+ VEXPANDHM,
+ VEXPANDQM,
+ VEXPANDWM,
+ VEXTRACTBM,
+ VEXTRACTDM,
+ VEXTRACTHM,
+ VEXTRACTQM,
+ VEXTRACTWM,
+ VEXTSB2D,
+ VEXTSB2Ds,
+ VEXTSB2W,
+ VEXTSB2Ws,
+ VEXTSD2Q,
+ VEXTSH2D,
+ VEXTSH2Ds,
+ VEXTSH2W,
+ VEXTSH2Ws,
+ VEXTSW2D,
+ VEXTSW2Ds,
+ VMAXFP,
+ VMAXSB,
+ VMAXSD,
+ VMAXSH,
+ VMAXSW,
+ VMAXUB,
+ VMAXUD,
+ VMAXUH,
+ VMAXUW,
+ VMINFP,
+ VMINSB,
+ VMINSD,
+ VMINSH,
+ VMINSW,
+ VMINUB,
+ VMINUD,
+ VMINUH,
+ VMINUW,
+ VMRGEW,
+ VMRGOW,
+ VNAND,
+ VNEGD,
+ VNEGW,
+ VNOR,
+ VOR,
+ VORC,
+ VPOPCNTB,
+ VPOPCNTD,
+ VPOPCNTH,
+ VPOPCNTW,
+ VPRTYBD,
+ VPRTYBW,
+ VRLB,
+ VRLD,
+ VRLDMI,
+ VRLDNM,
+ VRLH,
+ VRLW,
+ VRLWMI,
+ VRLWNM,
+ VSEL,
+ VSHASIGMAD,
+ VSHASIGMAW,
+ VSLB,
+ VSLD,
+ VSLH,
+ VSLW,
+ VSRAB,
+ VSRAD,
+ VSRAH,
+ VSRAW,
+ VSRB,
+ VSRD,
+ VSRH,
+ VSRW,
+ VSUBCUW,
+ VSUBSBS,
+ VSUBSHS,
+ VSUBSWS,
+ VSUBUBM,
+ VSUBUBS,
+ VSUBUDM,
+ VSUBUHM,
+ VSUBUHS,
+ VSUBUWM,
+ VSUBUWS,
+ VXOR,
+ V_SET0,
+ V_SET0B,
+ V_SET0H,
+ WAIT,
+ XOR,
+ XOR8,
+ XOR8_rec,
+ XORI,
+ XORI8,
+ XORIS,
+ XORIS8,
+ XOR_rec,
+ XSABSDP,
+ XSABSQP,
+ XSCMPEQDP,
+ XSCMPEXPDP,
+ XSCMPGEDP,
+ XSCMPGTDP,
+ XSCMPODP,
+ XSCMPUDP,
+ XSCPSGNDP,
+ XSCPSGNQP,
+ XSCVHPDP,
+ XSCVSPDPN,
+ XSIEXPDP,
+ XSIEXPQP,
+ XSMAXCDP,
+ XSMAXDP,
+ XSMAXJDP,
+ XSMINCDP,
+ XSMINDP,
+ XSMINJDP,
+ XSNABSDP,
+ XSNABSQP,
+ XSNEGDP,
+ XSNEGQP,
+ XSTDIVDP,
+ XSTSQRTDP,
+ XSTSTDCDP,
+ XSTSTDCSP,
+ XSXEXPDP,
+ XSXEXPQP,
+ XSXSIGDP,
+ XVABSDP,
+ XVABSSP,
+ XVCMPEQDP,
+ XVCMPEQDP_rec,
+ XVCMPEQSP,
+ XVCMPEQSP_rec,
+ XVCMPGEDP,
+ XVCMPGEDP_rec,
+ XVCMPGESP,
+ XVCMPGESP_rec,
+ XVCMPGTDP,
+ XVCMPGTDP_rec,
+ XVCMPGTSP,
+ XVCMPGTSP_rec,
+ XVCPSGNDP,
+ XVCPSGNSP,
+ XVCVHPSP,
+ XVIEXPDP,
+ XVIEXPSP,
+ XVMAXDP,
+ XVMAXSP,
+ XVMINDP,
+ XVMINSP,
+ XVNABSDP,
+ XVNABSSP,
+ XVNEGDP,
+ XVNEGSP,
+ XVTDIVDP,
+ XVTDIVSP,
+ XVTLSBB,
+ XVTSQRTDP,
+ XVTSQRTSP,
+ XVTSTDCDP,
+ XVTSTDCSP,
+ XVXEXPDP,
+ XVXEXPSP,
+ XVXSIGDP,
+ XVXSIGSP,
+ XXLAND,
+ XXLANDC,
+ XXLEQV,
+ XXLEQVOnes,
+ XXLNAND,
+ XXLNOR,
+ XXLOR,
+ XXLORC,
+ XXLORf,
+ XXLXOR,
+ XXLXORdpz,
+ XXLXORspz,
+ XXLXORz,
+ XXSEL)) \ No newline at end of file
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index a2664bcff4ab..ba74af5ef5f7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -4464,9 +4464,10 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
+ MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);
SDValue AddrOp;
- if (LDN)
- AddrOp = LDN->getOperand(1);
+ if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
+ AddrOp = N->getOperand(1);
else if (STN)
AddrOp = STN->getOperand(2);
@@ -5973,6 +5974,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
if (Type != MVT::v16i8 && Type != MVT::v8i16)
break;
+ // If the alignment for the load is 16 or bigger, we don't need the
+ // permutated mask to get the required value. The value must be the 0
+ // element in big endian target or 7/15 in little endian target in the
+ // result vsx register of lvx instruction.
+ // Select the instruction in the .td file.
+ if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
+ isOffsetMultipleOf(N, 16))
+ break;
+
SDValue ZeroReg =
CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ec7e30d7e362..8d6edf07bc53 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3500,15 +3500,16 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (LHS.getValueType() == MVT::v2i64) {
// Equality can be handled by casting to the legal type for Altivec
// comparisons, everything else needs to be expanded.
- if (CC == ISD::SETEQ || CC == ISD::SETNE) {
- return DAG.getNode(
- ISD::BITCAST, dl, MVT::v2i64,
- DAG.getSetCC(dl, MVT::v4i32,
- DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
- DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC));
- }
-
- return SDValue();
+ if (CC != ISD::SETEQ && CC != ISD::SETNE)
+ return SDValue();
+ SDValue SetCC32 = DAG.getSetCC(
+ dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
+ int ShuffV[] = {1, 0, 3, 2};
+ SDValue Shuff =
+ DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
+ return DAG.getBitcast(
+ MVT::v2i64, DAG.getNode(ISD::AND, dl, MVT::v4i32, Shuff, SetCC32));
}
// We handle most of these in the usual way.
@@ -6206,20 +6207,13 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
ArgOffset += PtrByteSize;
continue;
}
- // Copy entire object into memory. There are cases where gcc-generated
- // code assumes it is there, even if it could be put entirely into
- // registers. (This is not what the doc says.)
-
- // FIXME: The above statement is likely due to a misunderstanding of the
- // documents. All arguments must be copied into the parameter area BY
- // THE CALLEE in the event that the callee takes the address of any
- // formal argument. That has not yet been implemented. However, it is
- // reasonable to use the stack area as a staging area for the register
- // load.
-
- // Skip this for small aggregates, as we will use the same slot for a
- // right-justified copy, below.
- if (Size >= 8)
+ // Copy the object to parameter save area if it can not be entirely passed
+ // by registers.
+ // FIXME: we only need to copy the parts which need to be passed in
+ // parameter save area. For the parts passed by registers, we don't need
+ // to copy them to the stack although we need to allocate space for them
+ // in parameter save area.
+ if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
CallSeqStart,
Flags, DAG, dl);
@@ -17548,14 +17542,14 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
(ParentOp == ISD::INTRINSIC_VOID))) {
unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue();
- assert(
- ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) &&
- "Only the paired load and store (lxvp/stxvp) intrinsics are valid.");
- SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp) ? Parent->getOperand(2)
- : Parent->getOperand(3);
- computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
- FlagSet |= PPC::MOF_Vector;
- return FlagSet;
+ if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
+ SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
+ ? Parent->getOperand(2)
+ : Parent->getOperand(3);
+ computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
+ FlagSet |= PPC::MOF_Vector;
+ return FlagSet;
+ }
}
// Mark this as something we don't want to handle here if it is atomic
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 2cfd53de3290..c16e146da247 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -393,7 +393,9 @@ public:
MachineInstr &NewMI1,
MachineInstr &NewMI2) const override;
- void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const override;
+ // PowerPC specific version of setSpecialOperandAttr that copies Flags to MI
+ // and clears nuw, nsw, and exact flags.
+ void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const;
bool isCoalescableExtInstr(const MachineInstr &MI,
Register &SrcReg, Register &DstReg,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index d83ecc699b19..2340be5b5915 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -4780,6 +4780,7 @@ class PPCAsmPseudo<string asm, dag iops>
def : InstAlias<"sc", (SC 0)>;
def : InstAlias<"sync", (SYNC 0)>, Requires<[HasSYNC]>;
+def : InstAlias<"hwsync", (SYNC 0), 0>, Requires<[HasSYNC]>;
def : InstAlias<"msync", (SYNC 0), 0>, Requires<[HasSYNC]>;
def : InstAlias<"lwsync", (SYNC 1)>, Requires<[HasSYNC]>;
def : InstAlias<"ptesync", (SYNC 2)>, Requires<[HasSYNC]>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index d92a10c5b208..110f7d79fbc5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -158,6 +158,11 @@ def HasP9Vector : Predicate<"Subtarget->hasP9Vector()">;
def NoP9Altivec : Predicate<"!Subtarget->hasP9Altivec()">;
def NoP10Vector: Predicate<"!Subtarget->hasP10Vector()">;
+def PPCldsplatAlign16 : PatFrag<(ops node:$ptr), (PPCldsplat node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
+ isOffsetMultipleOf(N, 16);
+}]>;
+
//--------------------- VSX-specific instruction formats ---------------------//
// By default, all VSX instructions are to be selected over their Altivec
// counter parts and they do not have unmodeled sideeffects.
@@ -3180,6 +3185,12 @@ defm : ScalToVecWPermute<
v2f64, (f64 (load ForceXForm:$src)),
(XXPERMDIs (XFLOADf64 ForceXForm:$src), 2),
(SUBREG_TO_REG (i64 1), (XFLOADf64 ForceXForm:$src), sub_64)>;
+
+// Splat loads.
+def : Pat<(v8i16 (PPCldsplatAlign16 ForceXForm:$A)),
+ (v8i16 (VSPLTH 7, (LVX ForceXForm:$A)))>;
+def : Pat<(v16i8 (PPCldsplatAlign16 ForceXForm:$A)),
+ (v16i8 (VSPLTB 15, (LVX ForceXForm:$A)))>;
} // HasVSX, NoP9Vector, IsLittleEndian
let Predicates = [HasVSX, NoP9Vector, IsBigEndian] in {
@@ -3187,6 +3198,12 @@ let Predicates = [HasVSX, NoP9Vector, IsBigEndian] in {
(LXVD2X ForceXForm:$src)>;
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, ForceXForm:$dst),
(STXVD2X $rS, ForceXForm:$dst)>;
+
+ // Splat loads.
+ def : Pat<(v8i16 (PPCldsplatAlign16 ForceXForm:$A)),
+ (v8i16 (VSPLTH 0, (LVX ForceXForm:$A)))>;
+ def : Pat<(v16i8 (PPCldsplatAlign16 ForceXForm:$A)),
+ (v16i8 (VSPLTB 0, (LVX ForceXForm:$A)))>;
} // HasVSX, NoP9Vector, IsBigEndian
// Any VSX subtarget that only has loads and stores that load in big endian
diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index 7f63827afbd6..0c7be96a0595 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -413,9 +413,9 @@ bool PPCLoopInstrFormPrep::runOnFunction(Function &F) {
bool MadeChange = false;
- for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
- for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
- MadeChange |= runOnLoop(*L);
+ for (Loop *I : *LI)
+ for (Loop *L : depth_first(I))
+ MadeChange |= runOnLoop(L);
return MadeChange;
}
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
index e4954b722fd0..6b8ad22639c8 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.def
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
@@ -153,5 +153,7 @@ FUSION_FEATURE(ZeroMoveLR, hasZeroMoveFusion, -1,
FUSION_OP_SET(MTLR8, MTLR, MTSPR8, MTSPR),
FUSION_OP_SET(BCLR, BCLRn, gBCLR, BCLRL, BCLRLn, gBCLRL))
+#include "PPCBack2BackFusion.def"
+
#undef FUSION_FEATURE
#undef FUSION_OP_SET
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 1258a1281597..f11b4e14073e 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -135,6 +135,7 @@ void PPCSubtarget::initializeEnvironment() {
HasCompareFusion = false;
HasWideImmFusion = false;
HasZeroMoveFusion = false;
+ HasBack2BackFusion = false;
IsISA2_06 = false;
IsISA2_07 = false;
IsISA3_0 = false;
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index d52833cb1465..1300b62b623a 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -155,6 +155,7 @@ protected:
bool HasCompareFusion;
bool HasWideImmFusion;
bool HasZeroMoveFusion;
+ bool HasBack2BackFusion;
bool IsISA2_06;
bool IsISA2_07;
bool IsISA3_0;
@@ -348,6 +349,7 @@ public:
bool hasWideImmFusion() const { return HasWideImmFusion; }
bool hasSha3Fusion() const { return HasSha3Fusion; }
bool hasZeroMoveFusion() const { return HasZeroMoveFusion; }
+ bool hasBack2BackFusion() const { return HasBack2BackFusion; }
bool needsSwapsForVSXMemOps() const {
return hasVSX() && isLittleEndian() && !hasP9Vector();
}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 5d6f58a77a39..ed28731b8ef2 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -328,10 +328,6 @@ static bool isMMAType(Type *Ty) {
InstructionCost PPCTTIImpl::getUserCost(const User *U,
ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) {
- // Set the max cost if an MMA type is present (v256i1, v512i1).
- if (isMMAType(U->getType()))
- return InstructionCost::getMax();
-
// We already implement getCastInstrCost and getMemoryOpCost where we perform
// the vector adjustment there.
if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
@@ -1276,23 +1272,21 @@ PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
-bool PPCTTIImpl::areFunctionArgsABICompatible(
- const Function *Caller, const Function *Callee,
- SmallPtrSetImpl<Argument *> &Args) const {
+bool PPCTTIImpl::areTypesABICompatible(const Function *Caller,
+ const Function *Callee,
+ const ArrayRef<Type *> &Types) const {
// We need to ensure that argument promotion does not
// attempt to promote pointers to MMA types (__vector_pair
// and __vector_quad) since these types explicitly cannot be
// passed as arguments. Both of these types are larger than
// the 128-bit Altivec vectors and have a scalar size of 1 bit.
- if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args))
+ if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
return false;
- return llvm::none_of(Args, [](Argument *A) {
- auto *EltTy = cast<PointerType>(A->getType())->getElementType();
- if (EltTy->isSized())
- return (EltTy->isIntOrIntVectorTy(1) &&
- EltTy->getPrimitiveSizeInBits() > 128);
+ return llvm::none_of(Types, [](Type *Ty) {
+ if (Ty->isSized())
+ return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128;
return false;
});
}
@@ -1388,3 +1382,86 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
return false;
}
+
+bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType,
+ Align Alignment) const {
+ // Only load and stores instructions can have variable vector length on Power.
+ if (Opcode != Instruction::Load && Opcode != Instruction::Store)
+ return false;
+ // Loads/stores with length instructions use bits 0-7 of the GPR operand and
+ // therefore cannot be used in 32-bit mode.
+ if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64())
+ return false;
+ if (isa<FixedVectorType>(DataType)) {
+ unsigned VecWidth = DataType->getPrimitiveSizeInBits();
+ return VecWidth == 128;
+ }
+ Type *ScalarTy = DataType->getScalarType();
+
+ if (ScalarTy->isPointerTy())
+ return true;
+
+ if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
+ return true;
+
+ if (!ScalarTy->isIntegerTy())
+ return false;
+
+ unsigned IntWidth = ScalarTy->getIntegerBitWidth();
+ return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64;
+}
+
+InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment,
+ AddressSpace, CostKind, I);
+ if (TLI->getValueType(DL, Src, true) == MVT::Other)
+ return Cost;
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost;
+
+ assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+ "Invalid Opcode");
+
+ auto *SrcVTy = dyn_cast<FixedVectorType>(Src);
+ assert(SrcVTy && "Expected a vector type for VP memory operations");
+
+ if (hasActiveVectorLength(Opcode, Src, Alignment)) {
+ std::pair<InstructionCost, MVT> LT =
+ TLI->getTypeLegalizationCost(DL, SrcVTy);
+
+ InstructionCost CostFactor =
+ vectorCostAdjustmentFactor(Opcode, Src, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
+ InstructionCost Cost = LT.first * CostFactor;
+ assert(Cost.isValid() && "Expected valid cost");
+
+ // On P9 but not on P10, if the op is misaligned then it will cause a
+ // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked
+ // ones.
+ const Align DesiredAlignment(16);
+ if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9)
+ return Cost;
+
+ // Since alignment may be under estimated, we try to compute the probability
+ // that the actual address is aligned to the desired boundary. For example
+ // an 8-byte aligned load is assumed to be actually 16-byte aligned half the
+ // time, while a 4-byte aligned load has a 25% chance of being 16-byte
+ // aligned.
+ float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value();
+ float MisalignmentProb = 1.0 - AlignmentProb;
+ return (MisalignmentProb * P9PipelineFlushEstimate) +
+ (AlignmentProb * *Cost.getValue());
+ }
+
+ // Usually we should not get to this point, but the following is an attempt to
+ // model the cost of legalization. Currently we can only lower intrinsics with
+ // evl but no mask, on Power 9/10. Otherwise, we must scalarize.
+ return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 7aeb0c59d503..0af6f2a308d9 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -134,9 +134,19 @@ public:
bool UseMaskForCond = false, bool UseMaskForGaps = false);
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
- bool areFunctionArgsABICompatible(const Function *Caller,
- const Function *Callee,
- SmallPtrSetImpl<Argument *> &Args) const;
+ bool areTypesABICompatible(const Function *Caller, const Function *Callee,
+ const ArrayRef<Type *> &Types) const;
+ bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
+ Align Alignment) const;
+ InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+
+private:
+ // The following constant is used for estimating costs on power9.
+ static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
+
/// @}
};