aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp10
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp25
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h14
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/PowerPC/P9InstrResources.td8
-rw-r--r--lib/Target/PowerPC/PPC.h8
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp482
-rw-r--r--lib/Target/PowerPC/PPCBranchCoalescing.cpp13
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp29
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp41
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp71
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h11
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp105
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp546
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h49
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td4
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td12
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td9
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp336
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h42
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td206
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td180
-rw-r--r--lib/Target/PowerPC/PPCLoopPreIncPrep.cpp670
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp23
-rw-r--r--lib/Target/PowerPC/PPCMIPeephole.cpp82
-rw-r--r--lib/Target/PowerPC/PPCPreEmitPeephole.cpp106
-rw-r--r--lib/Target/PowerPC/PPCQPXLoadSplat.cpp6
-rw-r--r--lib/Target/PowerPC/PPCReduceCRLogicals.cpp15
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp39
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td22
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp18
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h24
-rw-r--r--lib/Target/PowerPC/PPCTLSDynamicCall.cpp4
-rw-r--r--lib/Target/PowerPC/PPCTOCRegDeps.cpp9
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp38
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp68
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.h12
-rw-r--r--lib/Target/PowerPC/PPCVSXCopy.cpp6
-rw-r--r--lib/Target/PowerPC/PPCVSXFMAMutate.cpp18
-rw-r--r--lib/Target/PowerPC/PPCVSXSwapRemoval.cpp32
45 files changed, 2172 insertions, 1244 deletions
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index c9524da93acd..aedf5b713c3f 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -579,7 +579,7 @@ public:
static std::unique_ptr<PPCOperand> CreateToken(StringRef Str, SMLoc S,
bool IsPPC64) {
- auto Op = make_unique<PPCOperand>(Token);
+ auto Op = std::make_unique<PPCOperand>(Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -608,7 +608,7 @@ public:
static std::unique_ptr<PPCOperand> CreateImm(int64_t Val, SMLoc S, SMLoc E,
bool IsPPC64) {
- auto Op = make_unique<PPCOperand>(Immediate);
+ auto Op = std::make_unique<PPCOperand>(Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -618,7 +618,7 @@ public:
static std::unique_ptr<PPCOperand> CreateExpr(const MCExpr *Val, SMLoc S,
SMLoc E, bool IsPPC64) {
- auto Op = make_unique<PPCOperand>(Expression);
+ auto Op = std::make_unique<PPCOperand>(Expression);
Op->Expr.Val = Val;
Op->Expr.CRVal = EvaluateCRExpr(Val);
Op->StartLoc = S;
@@ -629,7 +629,7 @@ public:
static std::unique_ptr<PPCOperand>
CreateTLSReg(const MCSymbolRefExpr *Sym, SMLoc S, SMLoc E, bool IsPPC64) {
- auto Op = make_unique<PPCOperand>(TLSRegister);
+ auto Op = std::make_unique<PPCOperand>(TLSRegister);
Op->TLSReg.Sym = Sym;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -639,7 +639,7 @@ public:
static std::unique_ptr<PPCOperand>
CreateContextImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
- auto Op = make_unique<PPCOperand>(ContextImmediate);
+ auto Op = std::make_unique<PPCOperand>(ContextImmediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 7a8af57961cb..3597fd15eeb1 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -167,12 +167,6 @@ static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, QFRegs);
}
-static DecodeStatus DecodeSPE4RCRegisterClass(MCInst &Inst, uint64_t RegNo,
- uint64_t Address,
- const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, RRegs);
-}
-
static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 042ddf48d5df..20f752c3041a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -78,7 +78,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
// determine the type of the relocation
unsigned Type;
if (IsPCRel) {
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
default:
llvm_unreachable("Unimplemented");
case PPC::fixup_ppc_br24:
@@ -131,7 +131,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
break;
}
} else {
- switch ((unsigned)Fixup.getKind()) {
+ switch (Fixup.getTargetKind()) {
default: llvm_unreachable("invalid fixup kind!");
case FK_NONE:
Type = ELF::R_PPC_NONE;
@@ -443,5 +443,5 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
std::unique_ptr<MCObjectTargetWriter>
llvm::createPPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) {
- return llvm::make_unique<PPCELFObjectWriter>(Is64Bit, OSABI);
+ return std::make_unique<PPCELFObjectWriter>(Is64Bit, OSABI);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 0e64ae55ab1c..7fc231618fa9 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -66,6 +66,31 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot, const MCSubtargetInfo &STI) {
+ // Customize printing of the addis instruction on AIX. When an operand is a
+ // symbol reference, the instruction syntax is changed to look like a load
+ // operation, i.e:
+ // Transform: addis $rD, $rA, $src --> addis $rD, $src($rA).
+ if (TT.isOSAIX() &&
+ (MI->getOpcode() == PPC::ADDIS8 || MI->getOpcode() == PPC::ADDIS) &&
+ MI->getOperand(2).isExpr()) {
+ assert((MI->getOperand(0).isReg() && MI->getOperand(1).isReg()) &&
+ "The first and the second operand of an addis instruction"
+ " should be registers.");
+
+ assert(isa<MCSymbolRefExpr>(MI->getOperand(2).getExpr()) &&
+ "The third operand of an addis instruction should be a symbol "
+ "reference expression if it is an expression at all.");
+
+ O << "\taddis ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 2, O);
+ O << "(";
+ printOperand(MI, 1, O);
+ O << ")";
+ return;
+ }
+
// Check for slwi/srwi mnemonics.
if (MI->getOpcode() == PPC::RLWINM) {
unsigned char SH = MI->getOperand(2).getImm();
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 5f0005ea1d7b..1216cd727289 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -86,4 +86,5 @@ void PPCXCOFFMCAsmInfo::anchor() {}
PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
assert(!IsLittleEndian && "Little-endian XCOFF not supported.");
CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4;
+ ZeroDirective = "\t.space\t";
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index d467f5c4a439..fb9dd5d7aa75 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -19,8 +19,8 @@ using namespace llvm;
const PPCMCExpr*
PPCMCExpr::create(VariantKind Kind, const MCExpr *Expr,
- bool isDarwin, MCContext &Ctx) {
- return new (Ctx) PPCMCExpr(Kind, Expr, isDarwin);
+ bool IsDarwin, MCContext &Ctx) {
+ return new (Ctx) PPCMCExpr(Kind, Expr, IsDarwin);
}
void PPCMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index 449e2c34f74d..ad1454566162 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -45,21 +45,21 @@ public:
/// @{
static const PPCMCExpr *create(VariantKind Kind, const MCExpr *Expr,
- bool isDarwin, MCContext &Ctx);
+ bool IsDarwin, MCContext &Ctx);
static const PPCMCExpr *createLo(const MCExpr *Expr,
- bool isDarwin, MCContext &Ctx) {
- return create(VK_PPC_LO, Expr, isDarwin, Ctx);
+ bool IsDarwin, MCContext &Ctx) {
+ return create(VK_PPC_LO, Expr, IsDarwin, Ctx);
}
static const PPCMCExpr *createHi(const MCExpr *Expr,
- bool isDarwin, MCContext &Ctx) {
- return create(VK_PPC_HI, Expr, isDarwin, Ctx);
+ bool IsDarwin, MCContext &Ctx) {
+ return create(VK_PPC_HI, Expr, IsDarwin, Ctx);
}
static const PPCMCExpr *createHa(const MCExpr *Expr,
- bool isDarwin, MCContext &Ctx) {
- return create(VK_PPC_HA, Expr, isDarwin, Ctx);
+ bool IsDarwin, MCContext &Ctx) {
+ return create(VK_PPC_HA, Expr, IsDarwin, Ctx);
}
/// @}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index 4cf7fd15fa75..672f910ab086 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -178,7 +178,7 @@ static uint32_t getFixupOffset(const MCAsmLayout &Layout,
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
// On Mach-O, ppc_fixup_half16 relocations must refer to the
// start of the instruction, not the second halfword, as ELF does
- if (unsigned(Fixup.getKind()) == PPC::fixup_ppc_half16)
+ if (Fixup.getTargetKind() == PPC::fixup_ppc_half16)
FixupOffset &= ~uint32_t(3);
return FixupOffset;
}
@@ -376,5 +376,5 @@ void PPCMachObjectWriter::RecordPPCRelocation(
std::unique_ptr<MCObjectTargetWriter>
llvm::createPPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
uint32_t CPUSubtype) {
- return llvm::make_unique<PPCMachObjectWriter>(Is64Bit, CPUType, CPUSubtype);
+ return std::make_unique<PPCMachObjectWriter>(Is64Bit, CPUType, CPUSubtype);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index 9c661286d455..7fdbb8990b55 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -25,5 +25,5 @@ PPCXCOFFObjectWriter::PPCXCOFFObjectWriter(bool Is64Bit)
std::unique_ptr<MCObjectTargetWriter>
llvm::createPPCXCOFFObjectWriter(bool Is64Bit) {
- return llvm::make_unique<PPCXCOFFObjectWriter>(Is64Bit);
+ return std::make_unique<PPCXCOFFObjectWriter>(Is64Bit);
}
diff --git a/lib/Target/PowerPC/P9InstrResources.td b/lib/Target/PowerPC/P9InstrResources.td
index 2a10322d3f49..f6cd8ed00c82 100644
--- a/lib/Target/PowerPC/P9InstrResources.td
+++ b/lib/Target/PowerPC/P9InstrResources.td
@@ -64,6 +64,7 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
XXLAND,
XXLANDC,
XXLEQV,
+ XXLEQVOnes,
XXLNAND,
XXLNOR,
XXLOR,
@@ -124,8 +125,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
(instregex "SRAD(I)?$"),
(instregex "EXTSWSLI_32_64$"),
(instregex "MFV(S)?RD$"),
- (instregex "MTVSRD$"),
- (instregex "MTVSRW(A|Z)$"),
+ (instregex "MTV(S)?RD$"),
+ (instregex "MTV(S)?RW(A|Z)$"),
(instregex "CMP(WI|LWI|W|LW)(8)?$"),
(instregex "CMP(L)?D(I)?$"),
(instregex "SUBF(I)?C(8)?$"),
@@ -148,7 +149,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
(instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"),
(instregex "ADD(4|8)(TLS)?(_)?$"),
(instregex "NEG(8)?$"),
- (instregex "ADDI(S)?toc(HA|L)$"),
+ (instregex "ADDI(S)?toc(HA|L)(8)?$"),
COPY,
MCRF,
MCRXRX,
@@ -158,6 +159,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
XSNEGDP,
XSCPSGNDP,
MFVSRWZ,
+ MFVRWZ,
EXTSWSLI,
SRADI_32,
RLDIC,
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index c6951ab67b08..0534773c4c9e 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -50,10 +50,10 @@ namespace llvm {
FunctionPass *createPPCExpandISELPass();
FunctionPass *createPPCPreEmitPeepholePass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AsmPrinter &AP, bool isDarwin);
+ AsmPrinter &AP, bool IsDarwin);
bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
MCOperand &OutMO, AsmPrinter &AP,
- bool isDarwin);
+ bool IsDarwin);
void initializePPCCTRLoopsPass(PassRegistry&);
#ifndef NDEBUG
@@ -86,8 +86,8 @@ namespace llvm {
MO_NO_FLAG,
/// On a symbol operand "FOO", this indicates that the reference is actually
- /// to "FOO@plt". This is used for calls and jumps to external functions on
- /// for PIC calls on Linux and ELF systems.
+ /// to "FOO@plt". This is used for calls and jumps to external functions
+ /// and for PIC calls on 32-bit ELF systems.
MO_PLT = 1,
/// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index bd87ce06b4fb..66236b72a1a3 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -51,9 +51,11 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
@@ -76,7 +78,7 @@ namespace {
class PPCAsmPrinter : public AsmPrinter {
protected:
- MapVector<MCSymbol *, MCSymbol *> TOC;
+ MapVector<const MCSymbol *, MCSymbol *> TOC;
const PPCSubtarget *Subtarget;
StackMaps SM;
@@ -87,7 +89,7 @@ public:
StringRef getPassName() const override { return "PowerPC Assembly Printer"; }
- MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
+ MCSymbol *lookUpOrCreateTOCEntry(const MCSymbol *Sym);
bool doInitialization(Module &M) override {
if (!TOC.empty())
@@ -164,6 +166,14 @@ public:
: PPCAsmPrinter(TM, std::move(Streamer)) {}
StringRef getPassName() const override { return "AIX PPC Assembly Printer"; }
+
+ void SetupMachineFunction(MachineFunction &MF) override;
+
+ void EmitGlobalVariable(const GlobalVariable *GV) override;
+
+ void EmitFunctionDescriptor() override;
+
+ void EmitEndOfAsmFile(Module &) override;
};
} // end anonymous namespace
@@ -265,7 +275,7 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
return true;
// This operand uses VSX numbering.
// If the operand is a VMX register, convert it to a VSX register.
- unsigned Reg = MI->getOperand(OpNo).getReg();
+ Register Reg = MI->getOperand(OpNo).getReg();
if (PPCInstrInfo::isVRRegister(Reg))
Reg = PPC::VSX32 + (Reg - PPC::V0);
else if (PPCInstrInfo::isVFRegister(Reg))
@@ -328,7 +338,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
-MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
+MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(const MCSymbol *Sym) {
MCSymbol *&TOCEntry = TOC[Sym];
if (!TOCEntry)
TOCEntry = createTempSymbol("C");
@@ -378,7 +388,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
if (CallTarget) {
assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
"High 16 bits of call target should be zero.");
- unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
+ Register ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
EncodedBytes = 0;
// Materialize the jump address:
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI8)
@@ -502,13 +512,32 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
.addExpr(SymVar));
}
+/// Map a machine operand for a TOC pseudo-machine instruction to its
+/// corresponding MCSymbol.
+static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
+ AsmPrinter &AP) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_GlobalAddress:
+ return AP.getSymbol(MO.getGlobal());
+ case MachineOperand::MO_ConstantPoolIndex:
+ return AP.GetCPISymbol(MO.getIndex());
+ case MachineOperand::MO_JumpTableIndex:
+ return AP.GetJTISymbol(MO.getIndex());
+ case MachineOperand::MO_BlockAddress:
+ return AP.GetBlockAddressSymbol(MO.getBlockAddress());
+ default:
+ llvm_unreachable("Unexpected operand type to get symbol.");
+ }
+}
+
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
///
void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
- bool isPPC64 = Subtarget->isPPC64();
- bool isDarwin = TM.getTargetTriple().isOSDarwin();
+ const bool IsDarwin = TM.getTargetTriple().isOSDarwin();
+ const bool IsPPC64 = Subtarget->isPPC64();
+ const bool IsAIX = Subtarget->isAIXABI();
const Module *M = MF->getFunction().getParent();
PICLevel::Level PL = M->getPICLevel();
@@ -517,7 +546,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (!MI->isInlineAsm()) {
for (const MachineOperand &MO: MI->operands()) {
if (MO.isReg()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Subtarget->hasSPE()) {
if (PPC::F4RCRegClass.contains(Reg) ||
PPC::F8RCRegClass.contains(Reg) ||
@@ -595,7 +624,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// addis r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@ha
// addi r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@l
// Get the offset from the GOT Base Register to the GOT
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
if (Subtarget->isSecurePlt() && isPositionIndependent() ) {
unsigned PICR = TmpInst.getOperand(0).getReg();
MCSymbol *BaseSymbol = OutContext.getOrCreateSymbol(
@@ -646,43 +675,57 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
case PPC::LWZtoc: {
- // Transform %r3 = LWZtoc @min1, %r2
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ assert(!IsDarwin && "TOC is an ELF/XCOFF construct.");
+
+ // Transform %rN = LWZtoc @op1, %r2
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
- // Change the opcode to LWZ, and the global address operand to be a
- // reference to the GOT entry we will synthesize later.
+ // Change the opcode to LWZ.
TmpInst.setOpcode(PPC::LWZ);
+
const MachineOperand &MO = MI->getOperand(1);
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+ "Invalid operand for LWZtoc.");
- // Map symbol -> label of TOC entry
- assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
- MCSymbol *MOSymbol = nullptr;
- if (MO.isGlobal())
- MOSymbol = getSymbol(MO.getGlobal());
- else if (MO.isCPI())
- MOSymbol = GetCPISymbol(MO.getIndex());
- else if (MO.isJTI())
- MOSymbol = GetJTISymbol(MO.getIndex());
- else if (MO.isBlockAddress())
- MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
-
- if (PL == PICLevel::SmallPIC) {
+ // Map the operand to its corresponding MCSymbol.
+ const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+
+ // Create a reference to the GOT entry for the symbol. The GOT entry will be
+ // synthesized later.
+ if (PL == PICLevel::SmallPIC && !IsAIX) {
const MCExpr *Exp =
MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_GOT,
OutContext);
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
- } else {
- MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
- const MCExpr *Exp =
- MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None,
- OutContext);
- const MCExpr *PB =
- MCSymbolRefExpr::create(OutContext.getOrCreateSymbol(Twine(".LTOC")),
- OutContext);
- Exp = MCBinaryExpr::createSub(Exp, PB, OutContext);
+ // Otherwise, use the TOC. 'TOCEntry' is a label used to reference the
+ // storage allocated in the TOC which contains the address of
+ // 'MOSymbol'. Said TOC entry will be synthesized later.
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+ const MCExpr *Exp =
+ MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None, OutContext);
+
+ // AIX uses the label directly as the lwz displacement operand for
+ // references into the toc section. The displacement value will be generated
+ // relative to the toc-base.
+ if (IsAIX) {
+ assert(
+ TM.getCodeModel() == CodeModel::Small &&
+ "This pseudo should only be selected for 32-bit small code model.");
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
}
+
+ // Create an explicit subtract expression between the local symbol and
+ // '.LTOC' to manifest the toc-relative offset.
+ const MCExpr *PB = MCSymbolRefExpr::create(
+ OutContext.getOrCreateSymbol(Twine(".LTOC")), OutContext);
+ Exp = MCBinaryExpr::createSub(Exp, PB, OutContext);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
@@ -690,72 +733,121 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::LDtocCPT:
case PPC::LDtocBA:
case PPC::LDtoc: {
+ assert(!IsDarwin && "TOC is an ELF/XCOFF construct");
+
// Transform %x3 = LDtoc @min1, %x2
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
- // Change the opcode to LD, and the global address operand to be a
- // reference to the TOC entry we will synthesize later.
+ // Change the opcode to LD.
TmpInst.setOpcode(PPC::LD);
+
const MachineOperand &MO = MI->getOperand(1);
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+ "Invalid operand!");
+
+ // Map the machine operand to its corresponding MCSymbol, then map the
+ // global address operand to be a reference to the TOC entry we will
+ // synthesize later.
+ MCSymbol *TOCEntry =
+ lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this));
+
+ const MCSymbolRefExpr::VariantKind VK =
+ IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC;
+ const MCExpr *Exp =
+ MCSymbolRefExpr::create(TOCEntry, VK, OutContext);
+ TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::ADDIStocHA: {
+ assert((IsAIX && !IsPPC64 && TM.getCodeModel() == CodeModel::Large) &&
+ "This pseudo should only be selected for 32-bit large code model on"
+ " AIX.");
+
+ // Transform %rd = ADDIStocHA %rA, @sym(%r2)
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
- // Map symbol -> label of TOC entry
- assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
- MCSymbol *MOSymbol = nullptr;
- if (MO.isGlobal())
- MOSymbol = getSymbol(MO.getGlobal());
- else if (MO.isCPI())
- MOSymbol = GetCPISymbol(MO.getIndex());
- else if (MO.isJTI())
- MOSymbol = GetJTISymbol(MO.getIndex());
- else if (MO.isBlockAddress())
- MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+ // Change the opcode to ADDIS.
+ TmpInst.setOpcode(PPC::ADDIS);
+ const MachineOperand &MO = MI->getOperand(2);
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+ "Invalid operand for ADDIStocHA.");
+
+ // Map the machine operand to its corresponding MCSymbol.
+ MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+
+ // Always use TOC on AIX. Map the global address operand to be a reference
+ // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
+ // reference the storage allocated in the TOC which contains the address of
+ // 'MOSymbol'.
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+ const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry,
+ MCSymbolRefExpr::VK_PPC_U,
+ OutContext);
+ TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::LWZtocL: {
+ assert(IsAIX && !IsPPC64 && TM.getCodeModel() == CodeModel::Large &&
+ "This pseudo should only be selected for 32-bit large code model on"
+ " AIX.");
- const MCExpr *Exp =
- MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
- OutContext);
+ // Transform %rd = LWZtocL @sym, %rs.
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+
+ // Change the opcode to lwz.
+ TmpInst.setOpcode(PPC::LWZ);
+
+ const MachineOperand &MO = MI->getOperand(1);
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+ "Invalid operand for LWZtocL.");
+
+ // Map the machine operand to its corresponding MCSymbol.
+ MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+
+ // Always use TOC on AIX. Map the global address operand to be a reference
+ // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
+ // reference the storage allocated in the TOC which contains the address of
+ // 'MOSymbol'.
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+ const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry,
+ MCSymbolRefExpr::VK_PPC_L,
+ OutContext);
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
+ case PPC::ADDIStocHA8: {
+ assert(!IsDarwin && "TOC is an ELF/XCOFF construct");
- case PPC::ADDIStocHA: {
- // Transform %xd = ADDIStocHA %x2, @sym
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ // Transform %xd = ADDIStocHA8 %x2, @sym
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
- // Change the opcode to ADDIS8. If the global address is external, has
- // common linkage, is a non-local function address, or is a jump table
- // address, then generate a TOC entry and reference that. Otherwise
- // reference the symbol directly.
+ // Change the opcode to ADDIS8. If the global address is the address of
+ // an external symbol, is a jump table address, is a block address, or is a
+ // constant pool index with large code model enabled, then generate a TOC
+ // entry and reference that. Otherwise, reference the symbol directly.
TmpInst.setOpcode(PPC::ADDIS8);
+
const MachineOperand &MO = MI->getOperand(2);
- assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
- MO.isBlockAddress()) &&
- "Invalid operand for ADDIStocHA!");
- MCSymbol *MOSymbol = nullptr;
- bool GlobalToc = false;
-
- if (MO.isGlobal()) {
- const GlobalValue *GV = MO.getGlobal();
- MOSymbol = getSymbol(GV);
- unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
- GlobalToc = (GVFlags & PPCII::MO_NLP_FLAG);
- } else if (MO.isCPI()) {
- MOSymbol = GetCPISymbol(MO.getIndex());
- } else if (MO.isJTI()) {
- MOSymbol = GetJTISymbol(MO.getIndex());
- } else if (MO.isBlockAddress()) {
- MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
- }
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
+ "Invalid operand for ADDIStocHA8!");
+
+ const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+ const bool GlobalToc =
+ MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
- TM.getCodeModel() == CodeModel::Large)
+ (MO.isCPI() && TM.getCodeModel() == CodeModel::Large))
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ const MCSymbolRefExpr::VariantKind VK =
+ IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA;
+
const MCExpr *Exp =
- MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA,
- OutContext);
+ MCSymbolRefExpr::create(MOSymbol, VK, OutContext);
if (!MO.isJTI() && MO.getOffset())
Exp = MCBinaryExpr::createAdd(Exp,
@@ -768,73 +860,59 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case PPC::LDtocL: {
+ assert(!IsDarwin && "TOC is an ELF/XCOFF construct");
+
// Transform %xd = LDtocL @sym, %xs
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
- // Change the opcode to LD. If the global address is external, has
- // common linkage, or is a jump table address, then reference the
- // associated TOC entry. Otherwise reference the symbol directly.
+ // Change the opcode to LD. If the global address is the address of
+ // an external symbol, is a jump table address, is a block address, or is
+ // a constant pool index with large code model enabled, then generate a
+ // TOC entry and reference that. Otherwise, reference the symbol directly.
TmpInst.setOpcode(PPC::LD);
+
const MachineOperand &MO = MI->getOperand(1);
assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
MO.isBlockAddress()) &&
"Invalid operand for LDtocL!");
- MCSymbol *MOSymbol = nullptr;
- if (MO.isJTI())
- MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
- else if (MO.isBlockAddress()) {
- MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
- MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
- }
- else if (MO.isCPI()) {
- MOSymbol = GetCPISymbol(MO.getIndex());
- if (TM.getCodeModel() == CodeModel::Large)
- MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
- }
- else if (MO.isGlobal()) {
- const GlobalValue *GV = MO.getGlobal();
- MOSymbol = getSymbol(GV);
- LLVM_DEBUG(
- unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
- assert((GVFlags & PPCII::MO_NLP_FLAG) &&
- "LDtocL used on symbol that could be accessed directly is "
- "invalid. Must match ADDIStocHA."));
+ LLVM_DEBUG(assert(
+ (!MO.isGlobal() || Subtarget->isGVIndirectSymbol(MO.getGlobal())) &&
+ "LDtocL used on symbol that could be accessed directly is "
+ "invalid. Must match ADDIStocHA8."));
+
+ const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+
+ if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
- }
+ const MCSymbolRefExpr::VariantKind VK =
+ IsAIX ? MCSymbolRefExpr::VK_PPC_L : MCSymbolRefExpr::VK_PPC_TOC_LO;
const MCExpr *Exp =
- MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
- OutContext);
+ MCSymbolRefExpr::create(MOSymbol, VK, OutContext);
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
case PPC::ADDItocL: {
// Transform %xd = ADDItocL %xs, @sym
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
- // Change the opcode to ADDI8. If the global address is external, then
- // generate a TOC entry and reference that. Otherwise reference the
+ // Change the opcode to ADDI8. If the global address is external, then
+ // generate a TOC entry and reference that. Otherwise, reference the
// symbol directly.
TmpInst.setOpcode(PPC::ADDI8);
+
const MachineOperand &MO = MI->getOperand(2);
- assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
- MCSymbol *MOSymbol = nullptr;
-
- if (MO.isGlobal()) {
- const GlobalValue *GV = MO.getGlobal();
- LLVM_DEBUG(unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
- assert(!(GVFlags & PPCII::MO_NLP_FLAG) &&
- "Interposable definitions must use indirect access."));
- MOSymbol = getSymbol(GV);
- } else if (MO.isCPI()) {
- MOSymbol = GetCPISymbol(MO.getIndex());
- }
+ assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL.");
+
+ LLVM_DEBUG(assert(
+ !(MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal())) &&
+ "Interposable definitions must use indirect access."));
const MCExpr *Exp =
- MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
- OutContext);
+ MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this),
+ MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext);
TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
@@ -842,13 +920,13 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::ADDISgotTprelHA: {
// Transform: %xd = ADDISgotTprelHA %x2, @sym
// Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha
- assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
+ assert(IsPPC64 && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTprel =
- MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
- OutContext);
+ MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
+ OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
@@ -858,16 +936,17 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::LDgotTprelL:
case PPC::LDgotTprelL32: {
// Transform %xd = LDgotTprelL @sym, %xs
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
// Change the opcode to LD.
- TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ);
+ TmpInst.setOpcode(IsPPC64 ? PPC::LD : PPC::LWZ);
const MachineOperand &MO = MI->getOperand(1);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
- const MCExpr *Exp =
- MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
- OutContext);
+ const MCExpr *Exp = MCSymbolRefExpr::create(
+ MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO
+ : MCSymbolRefExpr::VK_PPC_GOT_TPREL,
+ OutContext);
TmpInst.getOperand(1) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
return;
@@ -920,7 +999,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::ADDIStlsgdHA: {
// Transform: %xd = ADDIStlsgdHA %x2, @sym
// Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha
- assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
+ assert(IsPPC64 && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
@@ -943,11 +1022,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD = MCSymbolRefExpr::create(
- MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO
- : MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
+ MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO
+ : MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
OutContext);
EmitToStreamer(*OutStreamer,
- MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymGotTlsGD));
@@ -965,7 +1044,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::ADDIStlsldHA: {
// Transform: %xd = ADDIStlsldHA %x2, @sym
// Into: %xd = ADDIS8 %x2, sym@got@tlsld@ha
- assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC");
+ assert(IsPPC64 && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
@@ -988,11 +1067,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD = MCSymbolRefExpr::create(
- MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO
- : MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
+ MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO
+ : MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
OutContext);
EmitToStreamer(*OutStreamer,
- MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymGotTlsLD));
@@ -1021,7 +1100,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutContext);
EmitToStreamer(
*OutStreamer,
- MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
+ MCInstBuilder(IsPPC64 ? PPC::ADDIS8 : PPC::ADDIS)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymDtprel));
@@ -1040,7 +1119,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
OutContext);
EmitToStreamer(*OutStreamer,
- MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymDtprel));
@@ -1087,7 +1166,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// suite shows a handful of test cases that fail this check for
// Darwin. Those need to be investigated before this sanity test
// can be enabled for those subtargets.
- if (!Subtarget->isDarwin()) {
+ if (!IsDarwin) {
unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
const MachineOperand &MO = MI->getOperand(OpNum);
if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4)
@@ -1098,7 +1177,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
EmitToStreamer(*OutStreamer, TmpInst);
}
@@ -1368,15 +1447,16 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
OutStreamer->SwitchSection(Section);
- for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
- E = TOC.end(); I != E; ++I) {
- OutStreamer->EmitLabel(I->second);
- MCSymbol *S = I->first;
+ for (const auto &TOCMapPair : TOC) {
+ const MCSymbol *const TOCEntryTarget = TOCMapPair.first;
+ MCSymbol *const TOCEntryLabel = TOCMapPair.second;
+
+ OutStreamer->EmitLabel(TOCEntryLabel);
if (isPPC64) {
- TS.emitTCEntry(*S);
+ TS.emitTCEntry(*TOCEntryTarget);
} else {
OutStreamer->EmitValueToAlignment(4);
- OutStreamer->EmitSymbolValue(S, 4);
+ OutStreamer->EmitSymbolValue(TOCEntryTarget, 4);
}
}
}
@@ -1602,7 +1682,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
if (!Stubs.empty()) {
// Switch with ".non_lazy_symbol_pointer" directive.
OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
- EmitAlignment(isPPC64 ? 3 : 2);
+ EmitAlignment(isPPC64 ? Align(8) : Align(4));
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
// L_foo$stub:
@@ -1643,6 +1723,106 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
return AsmPrinter::doFinalization(M);
}
+void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+ // Get the function descriptor symbol.
+ CurrentFnDescSym = getSymbol(&MF.getFunction());
+ // Set the containing csect.
+ MCSectionXCOFF *FnDescSec = OutStreamer->getContext().getXCOFFSection(
+ CurrentFnDescSym->getName(), XCOFF::XMC_DS, XCOFF::XTY_SD,
+ XCOFF::C_HIDEXT, SectionKind::getData());
+ cast<MCSymbolXCOFF>(CurrentFnDescSym)->setContainingCsect(FnDescSec);
+
+ return AsmPrinter::SetupMachineFunction(MF);
+}
+
+void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ // Early error checking limiting what is supported.
+ if (GV->isThreadLocal())
+ report_fatal_error("Thread local not yet supported on AIX.");
+
+ if (GV->hasSection())
+ report_fatal_error("Custom section for Data not yet supported.");
+
+ if (GV->hasComdat())
+ report_fatal_error("COMDAT not yet supported by AIX.");
+
+ SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM);
+ if (!GVKind.isCommon() && !GVKind.isBSSLocal() && !GVKind.isData())
+ report_fatal_error("Encountered a global variable kind that is "
+ "not supported yet.");
+
+ // Create the containing csect and switch to it.
+ MCSectionXCOFF *CSect = cast<MCSectionXCOFF>(
+ getObjFileLowering().SectionForGlobal(GV, GVKind, TM));
+ OutStreamer->SwitchSection(CSect);
+
+ // Create the symbol, set its storage class, and emit it.
+ MCSymbolXCOFF *GVSym = cast<MCSymbolXCOFF>(getSymbol(GV));
+ GVSym->setStorageClass(
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
+ GVSym->setContainingCsect(CSect);
+
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+
+ // Handle common symbols.
+ if (GVKind.isCommon() || GVKind.isBSSLocal()) {
+ unsigned Align =
+ GV->getAlignment() ? GV->getAlignment() : DL.getPreferredAlignment(GV);
+ uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+
+ if (GVKind.isBSSLocal())
+ OutStreamer->EmitXCOFFLocalCommonSymbol(GVSym, Size, Align);
+ else
+ OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ MCSymbol *EmittedInitSym = GVSym;
+ EmitLinkage(GV, EmittedInitSym);
+ EmitAlignment(getGVAlignment(GV, DL), GV);
+ OutStreamer->EmitLabel(EmittedInitSym);
+ EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
+}
+
+void PPCAIXAsmPrinter::EmitFunctionDescriptor() {
+ const DataLayout &DL = getDataLayout();
+ const unsigned PointerSize = DL.getPointerSizeInBits() == 64 ? 8 : 4;
+
+ MCSectionSubPair Current = OutStreamer->getCurrentSection();
+ // Emit function descriptor.
+ OutStreamer->SwitchSection(
+ cast<MCSymbolXCOFF>(CurrentFnDescSym)->getContainingCsect());
+ OutStreamer->EmitLabel(CurrentFnDescSym);
+ // Emit function entry point address.
+ OutStreamer->EmitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext),
+ PointerSize);
+ // Emit TOC base address.
+ MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]"));
+ OutStreamer->EmitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext),
+ PointerSize);
+ // Emit a null environment pointer.
+ OutStreamer->EmitIntValue(0, PointerSize);
+
+ OutStreamer->SwitchSection(Current.first, Current.second);
+}
+
+void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ // If there are no functions in this module, we will never need to reference
+ // the TOC base.
+ if (M.empty())
+ return;
+
+ // Emit TOC base.
+ MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]"));
+ MCSectionXCOFF *TOCBaseSection = OutStreamer->getContext().getXCOFFSection(
+ StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT,
+ SectionKind::getData());
+ cast<MCSymbolXCOFF>(TOCBaseSym)->setContainingCsect(TOCBaseSection);
+ // Switch to section to emit TOC base.
+ OutStreamer->SwitchSection(TOCBaseSection);
+}
+
+
/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
/// for a MachineFunction to the given output stream, in a format that the
/// Darwin assembler can deal with.
diff --git a/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index 5e9a661f8f0b..d325b078979f 100644
--- a/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -340,9 +340,10 @@ bool PPCBranchCoalescing::identicalOperands(
if (Op1.isIdenticalTo(Op2)) {
// filter out instructions with physical-register uses
- if (Op1.isReg() && TargetRegisterInfo::isPhysicalRegister(Op1.getReg())
- // If the physical register is constant then we can assume the value
- // has not changed between uses.
+ if (Op1.isReg() &&
+ Register::isPhysicalRegister(Op1.getReg())
+ // If the physical register is constant then we can assume the value
+ // has not changed between uses.
&& !(Op1.isUse() && MRI->isConstantPhysReg(Op1.getReg()))) {
LLVM_DEBUG(dbgs() << "The operands are not provably identical.\n");
return false;
@@ -355,8 +356,8 @@ bool PPCBranchCoalescing::identicalOperands(
// definition of the register produces the same value. If they produce the
// same value, consider them to be identical.
if (Op1.isReg() && Op2.isReg() &&
- TargetRegisterInfo::isVirtualRegister(Op1.getReg()) &&
- TargetRegisterInfo::isVirtualRegister(Op2.getReg())) {
+ Register::isVirtualRegister(Op1.getReg()) &&
+ Register::isVirtualRegister(Op2.getReg())) {
MachineInstr *Op1Def = MRI->getVRegDef(Op1.getReg());
MachineInstr *Op2Def = MRI->getVRegDef(Op2.getReg());
if (TII->produceSameValue(*Op1Def, *Op2Def, MRI)) {
@@ -456,7 +457,7 @@ bool PPCBranchCoalescing::canMoveToEnd(const MachineInstr &MI,
<< TargetMBB.getNumber() << "\n");
for (auto &Use : MI.uses()) {
- if (Use.isReg() && TargetRegisterInfo::isVirtualRegister(Use.getReg())) {
+ if (Use.isReg() && Register::isVirtualRegister(Use.getReg())) {
MachineInstr *DefInst = MRI->getVRegDef(Use.getReg());
if (DefInst->isPHI() && DefInst->getParent() == MI.getParent()) {
LLVM_DEBUG(dbgs() << " *** Cannot move this instruction ***\n");
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 793d690baec3..cdff4d383d23 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -81,21 +81,20 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
/// original Offset.
unsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB,
unsigned Offset) {
- unsigned Align = MBB.getAlignment();
- if (!Align)
+ const Align Alignment = MBB.getAlignment();
+ if (Alignment == Align::None())
return 0;
- unsigned AlignAmt = 1 << Align;
- unsigned ParentAlign = MBB.getParent()->getAlignment();
+ const Align ParentAlign = MBB.getParent()->getAlignment();
- if (Align <= ParentAlign)
- return OffsetToAlignment(Offset, AlignAmt);
+ if (Alignment <= ParentAlign)
+ return offsetToAlignment(Offset, Alignment);
// The alignment of this MBB is larger than the function's alignment, so we
// can't tell whether or not it will insert nops. Assume that it will.
if (FirstImpreciseBlock < 0)
FirstImpreciseBlock = MBB.getNumber();
- return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
+ return Alignment.value() + offsetToAlignment(Offset, Alignment);
}
/// We need to be careful about the offset of the first block in the function
@@ -179,7 +178,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn,
const MachineBasicBlock *Dest,
unsigned BrOffset) {
int BranchSize;
- unsigned MaxAlign = 2;
+ Align MaxAlign = Align(4);
bool NeedExtraAdjustment = false;
if (Dest->getNumber() <= Src->getNumber()) {
// If this is a backwards branch, the delta is the offset from the
@@ -192,8 +191,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn,
BranchSize += BlockSizes[DestBlock].first;
for (unsigned i = DestBlock+1, e = Src->getNumber(); i < e; ++i) {
BranchSize += BlockSizes[i].first;
- MaxAlign = std::max(MaxAlign,
- Fn.getBlockNumbered(i)->getAlignment());
+ MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment());
}
NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
@@ -207,8 +205,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn,
MaxAlign = std::max(MaxAlign, Dest->getAlignment());
for (unsigned i = StartBlock+1, e = Dest->getNumber(); i != e; ++i) {
BranchSize += BlockSizes[i].first;
- MaxAlign = std::max(MaxAlign,
- Fn.getBlockNumbered(i)->getAlignment());
+ MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment());
}
NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
@@ -258,7 +255,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn,
// The computed offset is at most ((1 << alignment) - 4) bytes smaller
// than actual offset. So we add this number to the offset for safety.
if (NeedExtraAdjustment)
- BranchSize += (1 << MaxAlign) - 4;
+ BranchSize += MaxAlign.value() - 4;
return BranchSize;
}
@@ -339,16 +336,16 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
// 1. CR register
// 2. Target MBB
PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
- unsigned CRReg = I->getOperand(1).getReg();
+ Register CRReg = I->getOperand(1).getReg();
// Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
BuildMI(MBB, I, dl, TII->get(PPC::BCC))
.addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
} else if (I->getOpcode() == PPC::BC) {
- unsigned CRBit = I->getOperand(0).getReg();
+ Register CRBit = I->getOperand(0).getReg();
BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2);
} else if (I->getOpcode() == PPC::BCn) {
- unsigned CRBit = I->getOperand(0).getReg();
+ Register CRBit = I->getOperand(0).getReg();
BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2);
} else if (I->getOpcode() == PPC::BDNZ) {
BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2);
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 264d6b590f95..d8425d89da92 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -162,7 +162,7 @@ class PPCFastISel final : public FastISel {
bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt, unsigned DestReg,
const PPC::Predicate Pred);
- bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
const TargetRegisterClass *RC, bool IsZExt = true,
unsigned FP64LoadOpc = PPC::LFD);
bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
@@ -451,7 +451,7 @@ void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
// Emit a load instruction if possible, returning true if we succeeded,
// otherwise false. See commentary below for how the register class of
// the load is determined.
-bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
const TargetRegisterClass *RC,
bool IsZExt, unsigned FP64LoadOpc) {
unsigned Opc;
@@ -469,7 +469,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
(ResultReg ? MRI.getRegClass(ResultReg) :
(RC ? RC :
(VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
- (VT == MVT::f32 ? (HasSPE ? &PPC::SPE4RCRegClass : &PPC::F4RCRegClass) :
+ (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
(VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
&PPC::GPRC_and_GPRC_NOR0RegClass)))));
@@ -612,7 +612,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) {
const TargetRegisterClass *RC =
AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
- unsigned ResultReg = 0;
+ Register ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
return false;
@@ -989,7 +989,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
unsigned DestReg;
auto RC = MRI.getRegClass(SrcReg);
if (PPCSubTarget->hasSPE()) {
- DestReg = createResultReg(&PPC::SPE4RCRegClass);
+ DestReg = createResultReg(&PPC::GPRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::EFSCFD), DestReg)
.addReg(SrcReg);
@@ -1051,7 +1051,7 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
}
const TargetRegisterClass *RC = &PPC::F8RCRegClass;
- unsigned ResultReg = 0;
+ Register ResultReg = 0;
if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
return 0;
@@ -1176,7 +1176,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
const TargetRegisterClass *RC =
AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
- unsigned ResultReg = 0;
+ Register ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
return 0;
@@ -1229,9 +1229,9 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (PPCSubTarget->hasSPE()) {
DestReg = createResultReg(&PPC::GPRCRegClass);
if (IsSigned)
- Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
+ Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
else
- Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
+ Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
} else if (isVSFRCRegClass(RC)) {
DestReg = createResultReg(&PPC::VSFRCRegClass);
if (DstVT == MVT::i32)
@@ -1717,7 +1717,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
CCValAssign &VA = ValLocs[0];
- unsigned RetReg = VA.getLocReg();
+ Register RetReg = VA.getLocReg();
// We still need to worry about properly extending the sign. For example,
// we could have only a single bit or a constant that needs zero
// extension rather than sign extension. Make sure we pass the return
@@ -2002,7 +2002,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
const bool HasSPE = PPCSubTarget->hasSPE();
const TargetRegisterClass *RC;
if (HasSPE)
- RC = ((VT == MVT::f32) ? &PPC::SPE4RCRegClass : &PPC::SPERCRegClass);
+ RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
else
RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
@@ -2031,8 +2031,8 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addImm(0).addReg(TmpReg).addMemOperand(MMO);
} else {
- // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
+ // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8),
TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
// But for large code model, we must generate a LDtocL followed
// by the LF[SD].
@@ -2085,16 +2085,15 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
// or externally available linkage, a non-local function address, or a
// jump table address (not yet needed), or if we are generating code
// for large code model, we generate:
- // LDtocL(GV, ADDIStocHA(%x2, GV))
+ // LDtocL(GV, ADDIStocHA8(%x2, GV))
// Otherwise we generate:
- // ADDItocL(ADDIStocHA(%x2, GV), GV)
- // Either way, start with the ADDIStocHA:
+ // ADDItocL(ADDIStocHA8(%x2, GV), GV)
+ // Either way, start with the ADDIStocHA8:
unsigned HighPartReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8),
HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
- unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
- if (GVFlags & PPCII::MO_NLP_FLAG) {
+ if (PPCSubTarget->isGVIndirectSymbol(GV)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
DestReg).addGlobalAddress(GV).addReg(HighPartReg);
} else {
@@ -2353,7 +2352,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
if (!PPCComputeAddress(LI->getOperand(0), Addr))
return false;
- unsigned ResultReg = MI->getOperand(0).getReg();
+ Register ResultReg = MI->getOperand(0).getReg();
if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
@@ -2464,7 +2463,7 @@ namespace llvm {
const TargetLibraryInfo *LibInfo) {
// Only available on 64-bit ELF for now.
const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
- if (Subtarget.isPPC64() && Subtarget.isSVR4ABI())
+ if (Subtarget.is64BitELFABI())
return new PPCFastISel(FuncInfo, LibInfo);
return nullptr;
}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index ebfb1ef7f49b..06a4d183e781 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -47,13 +47,15 @@ static const MCPhysReg VRRegNo[] = {
};
static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
- if (STI.isDarwinABI())
+ if (STI.isDarwinABI() || STI.isAIXABI())
return STI.isPPC64() ? 16 : 8;
// SVR4 ABI:
return STI.isPPC64() ? 16 : 4;
}
static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
+ if (STI.isAIXABI())
+ return STI.isPPC64() ? 40 : 20;
return STI.isELFv2ABI() ? 24 : 40;
}
@@ -88,6 +90,11 @@ static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
: STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
}
+static unsigned computeCRSaveOffset() {
+ // The condition register save offset needs to be updated for AIX PPC32.
+ return 8;
+}
+
PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
STI.getPlatformStackAlignment(), 0),
@@ -95,7 +102,8 @@ PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
LinkageSize(computeLinkageSize(Subtarget)),
- BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {}
+ BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
+ CRSaveOffset(computeCRSaveOffset()) {}
// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
@@ -370,8 +378,8 @@ static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
return;
}
- unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
if (DstReg != SrcReg)
@@ -781,15 +789,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
bool isPPC64 = Subtarget.isPPC64();
// Get the ABI.
bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isAIXABI = Subtarget.isAIXABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
- assert((Subtarget.isDarwinABI() || isSVR4ABI) &&
- "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
+ assert((Subtarget.isDarwinABI() || isSVR4ABI || isAIXABI) &&
+ "Unsupported PPC ABI.");
// Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
// process it.
if (!isSVR4ABI)
for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+ if (isAIXABI)
+ report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
HandleVRSaveUpdate(*MBBI, TII);
break;
}
@@ -819,7 +830,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
bool HasRedZone = isPPC64 || !isSVR4ABI;
unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
- unsigned BPReg = RegInfo->getBaseRegister(MF);
+ Register BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
@@ -908,6 +919,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
assert((isPPC64 || !MustSaveCR) &&
"Prologue CR saving supported only in 64-bit mode");
+ if (MustSaveCR && isAIXABI)
+ report_fatal_error("Prologue CR saving is unimplemented on AIX.");
+
// Check if we can move the stack update instruction (stdu) down the prologue
// past the callee saves. Hopefully this will avoid the situation where the
// saves are waiting for the update on the store with update to complete.
@@ -966,7 +980,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MIB.addReg(MustSaveCRs[i], CrState);
BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
.addReg(TempReg, getKillRegState(true))
- .addImm(8)
+ .addImm(getCRSaveOffset())
.addReg(SPReg);
}
@@ -1020,7 +1034,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
assert(HasRedZone && "A red zone is always available on PPC64");
BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
.addReg(TempReg, getKillRegState(true))
- .addImm(8)
+ .addImm(getCRSaveOffset())
.addReg(SPReg);
}
@@ -1324,7 +1338,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// actually saved gets its own CFI record.
unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(CRReg, true), 8));
+ nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset()));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
continue;
@@ -1387,7 +1401,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
- unsigned BPReg = RegInfo->getBaseRegister(MF);
+ Register BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned ScratchReg = 0;
unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
@@ -1590,7 +1604,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// is live here.
assert(HasRedZone && "Expecting red zone");
BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
- .addImm(8)
+ .addImm(getCRSaveOffset())
.addReg(SPReg);
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
@@ -1614,7 +1628,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
assert(isPPC64 && "Expecting 64-bit mode");
assert(RBReg == SPReg && "Should be using SP as a base register");
BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
- .addImm(8)
+ .addImm(getCRSaveOffset())
.addReg(RBReg);
}
@@ -1762,8 +1776,8 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Save R31 if necessary
int FPSI = FI->getFramePointerSaveIndex();
- bool isPPC64 = Subtarget.isPPC64();
- bool isDarwinABI = Subtarget.isDarwinABI();
+ const bool isPPC64 = Subtarget.isPPC64();
+ const bool IsDarwinABI = Subtarget.isDarwinABI();
MachineFrameInfo &MFI = MF.getFrameInfo();
// If the frame pointer save index hasn't been defined yet.
@@ -1812,7 +1826,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
// For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
// function uses CR 2, 3, or 4.
- if (!isPPC64 && !isDarwinABI &&
+ if (!isPPC64 && !IsDarwinABI &&
(SavedRegs.test(PPC::CR2) ||
SavedRegs.test(PPC::CR3) ||
SavedRegs.test(PPC::CR4))) {
@@ -1872,8 +1886,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
(Reg != PPC::X2 && Reg != PPC::R2)) &&
"Not expecting to try to spill R2 in a function that must save TOC");
- if (PPC::GPRCRegClass.contains(Reg) ||
- PPC::SPE4RCRegClass.contains(Reg)) {
+ if (PPC::GPRCRegClass.contains(Reg)) {
HasGPSaveArea = true;
GPRegs.push_back(CSI[i]);
@@ -1967,7 +1980,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
assert(FI && "No Base Pointer Save Slot!");
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
- unsigned BP = RegInfo->getBaseRegister(MF);
+ Register BP = RegInfo->getBaseRegister(MF);
if (PPC::G8RCRegClass.contains(BP)) {
MinG8R = std::min<unsigned>(MinG8R, BP);
HasG8SaveArea = true;
@@ -2428,6 +2441,26 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
+unsigned PPCFrameLowering::getTOCSaveOffset() const {
+ if (Subtarget.isAIXABI())
+ // TOC save/restore is normally handled by the linker.
+ // Indirect calls should hit this limitation.
+ report_fatal_error("TOC save is not implemented on AIX yet.");
+ return TOCSaveOffset;
+}
+
+unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
+ if (Subtarget.isAIXABI())
+ report_fatal_error("FramePointer is not implemented on AIX yet.");
+ return FramePointerSaveOffset;
+}
+
+unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
+ if (Subtarget.isAIXABI())
+ report_fatal_error("BasePointer is not implemented on AIX yet.");
+ return BasePointerSaveOffset;
+}
+
bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
return false;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index d116e9fd22e1..a5fbc9acbb28 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -26,6 +26,7 @@ class PPCFrameLowering: public TargetFrameLowering {
const unsigned FramePointerSaveOffset;
const unsigned LinkageSize;
const unsigned BasePointerSaveOffset;
+ const unsigned CRSaveOffset;
/**
* Find register[s] that can be used in function prologue and epilogue
@@ -142,15 +143,19 @@ public:
/// getTOCSaveOffset - Return the previous frame offset to save the
/// TOC register -- 64-bit SVR4 ABI only.
- unsigned getTOCSaveOffset() const { return TOCSaveOffset; }
+ unsigned getTOCSaveOffset() const;
/// getFramePointerSaveOffset - Return the previous frame offset to save the
/// frame pointer.
- unsigned getFramePointerSaveOffset() const { return FramePointerSaveOffset; }
+ unsigned getFramePointerSaveOffset() const;
/// getBasePointerSaveOffset - Return the previous frame offset to save the
/// base pointer.
- unsigned getBasePointerSaveOffset() const { return BasePointerSaveOffset; }
+ unsigned getBasePointerSaveOffset() const;
+
+ /// getCRSaveOffset - Return the previous frame offset to save the
+ /// CR register.
+ unsigned getCRSaveOffset() const { return CRSaveOffset; }
/// getLinkageSize - Return the size of the PowerPC ABI linkage area.
///
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 543cac075f55..4ad6c88233fe 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -371,7 +371,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
// by the scheduler. Detect them now.
bool HasVectorVReg = false;
for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
HasVectorVReg = true;
break;
@@ -391,8 +391,8 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
// Create two vregs - one to hold the VRSAVE register that is live-in to the
// function and one for the value after having bits or'd into it.
- unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
- unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
MachineBasicBlock &EntryBB = *Fn.begin();
@@ -447,7 +447,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
} else {
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
- unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
BuildMI(FirstMBB, MBBI, dl,
TII.get(PPC::UpdateGBR), GlobalBaseReg)
.addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
@@ -5065,52 +5065,95 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
case PPCISD::TOC_ENTRY: {
- assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
- "Only supported for 64-bit ABI and 32-bit SVR4");
- if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
- SDValue GA = N->getOperand(0);
- SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
- N->getOperand(1));
- transferMemOperands(N, MN);
- ReplaceNode(N, MN);
- return;
- }
+ const bool isPPC64 = PPCSubTarget->isPPC64();
+ const bool isELFABI = PPCSubTarget->isSVR4ABI();
+ const bool isAIXABI = PPCSubTarget->isAIXABI();
+
+ assert(!PPCSubTarget->isDarwin() && "TOC is an ELF/XCOFF construct");
+
+ // PowerPC only support small, medium and large code model.
+ const CodeModel::Model CModel = TM.getCodeModel();
+ assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
+ "PowerPC doesn't support tiny or kernel code models.");
- // For medium and large code model, we generate two instructions as
- // described below. Otherwise we allow SelectCodeCommon to handle this,
+ if (isAIXABI && CModel == CodeModel::Medium)
+ report_fatal_error("Medium code model is not supported on AIX.");
+
+ // For 64-bit small code model, we allow SelectCodeCommon to handle this,
// selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
- CodeModel::Model CModel = TM.getCodeModel();
- if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
+ if (isPPC64 && CModel == CodeModel::Small)
break;
- // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
- // If it must be toc-referenced according to PPCSubTarget, we generate:
- // LDtocL(@sym, ADDIStocHA(%x2, @sym))
+ // Handle 32-bit small code model.
+ if (!isPPC64) {
+ // Transforms the ISD::TOC_ENTRY node to a PPCISD::LWZtoc.
+ auto replaceWithLWZtoc = [this, &dl](SDNode *TocEntry) {
+ SDValue GA = TocEntry->getOperand(0);
+ SDValue TocBase = TocEntry->getOperand(1);
+ SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
+ TocBase);
+ transferMemOperands(TocEntry, MN);
+ ReplaceNode(TocEntry, MN);
+ };
+
+ if (isELFABI) {
+ assert(TM.isPositionIndependent() &&
+ "32-bit ELF can only have TOC entries in position independent"
+ " code.");
+ // 32-bit ELF always uses a small code model toc access.
+ replaceWithLWZtoc(N);
+ return;
+ }
+
+ if (isAIXABI && CModel == CodeModel::Small) {
+ replaceWithLWZtoc(N);
+ return;
+ }
+ }
+
+ assert(CModel != CodeModel::Small && "All small code models handled.");
+
+ assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
+ " ELF/AIX or 32-bit AIX in the following.");
+
+ // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode
+ // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We
+ // generate two instructions as described below. The first source operand
+ // is a symbol reference. If it must be toc-referenced according to
+ // PPCSubTarget, we generate:
+ // [32-bit AIX]
+ // LWZtocL(@sym, ADDIStocHA(%r2, @sym))
+ // [64-bit ELF/AIX]
+ // LDtocL(@sym, ADDIStocHA8(%x2, @sym))
// Otherwise we generate:
- // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
+ // ADDItocL(ADDIStocHA8(%x2, @sym), @sym)
SDValue GA = N->getOperand(0);
SDValue TOCbase = N->getOperand(1);
- SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
- TOCbase, GA);
+
+ EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDNode *Tmp = CurDAG->getMachineNode(
+ isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
+
if (PPCLowering->isAccessedAsGotIndirect(GA)) {
- // If it is access as got-indirect, we need an extra LD to load
+ // If it is accessed as got-indirect, we need an extra LWZ/LD to load
// the address.
- SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
- SDValue(Tmp, 0));
+ SDNode *MN = CurDAG->getMachineNode(
+ isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
+
transferMemOperands(N, MN);
ReplaceNode(N, MN);
return;
}
- // Build the address relative to the TOC-pointer..
+ // Build the address relative to the TOC-pointer.
ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA));
return;
}
case PPCISD::PPC32_PICGOT:
// Generate a PIC-safe GOT reference.
- assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
- "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
+ assert(PPCSubTarget->is32BitELFABI() &&
+ "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
PPCLowering->getPointerTy(CurDAG->getDataLayout()),
MVT::i32);
@@ -6456,7 +6499,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
continue;
if (!HBase.isMachineOpcode() ||
- HBase.getMachineOpcode() != PPC::ADDIStocHA)
+ HBase.getMachineOpcode() != PPC::ADDIStocHA8)
continue;
if (!Base.hasOneUse() || !HBase.hasOneUse())
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 24d50074860d..8cf6a660b08b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -139,13 +139,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
bool isPPC64 = Subtarget.isPPC64();
- setMinStackArgumentAlignment(isPPC64 ? 8:4);
+ setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
// Set up the register classes.
addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
if (!useSoftFloat()) {
if (hasSPE()) {
- addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass);
+ addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
} else {
addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
@@ -431,28 +431,26 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
- if (Subtarget.isSVR4ABI()) {
- if (isPPC64) {
- // VAARG always uses double-word chunks, so promote anything smaller.
- setOperationAction(ISD::VAARG, MVT::i1, Promote);
- AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
- setOperationAction(ISD::VAARG, MVT::i8, Promote);
- AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
- setOperationAction(ISD::VAARG, MVT::i16, Promote);
- AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
- setOperationAction(ISD::VAARG, MVT::i32, Promote);
- AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
- setOperationAction(ISD::VAARG, MVT::Other, Expand);
- } else {
- // VAARG is custom lowered with the 32-bit SVR4 ABI.
- setOperationAction(ISD::VAARG, MVT::Other, Custom);
- setOperationAction(ISD::VAARG, MVT::i64, Custom);
- }
+ if (Subtarget.is64BitELFABI()) {
+ // VAARG always uses double-word chunks, so promote anything smaller.
+ setOperationAction(ISD::VAARG, MVT::i1, Promote);
+ AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i8, Promote);
+ AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i16, Promote);
+ AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i32, Promote);
+ AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ } else if (Subtarget.is32BitELFABI()) {
+ // VAARG is custom lowered with the 32-bit SVR4 ABI.
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::i64, Custom);
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
- if (Subtarget.isSVR4ABI() && !isPPC64)
- // VACOPY is custom lowered with the 32-bit SVR4 ABI.
+ // VACOPY is custom lowered with the 32-bit SVR4 ABI.
+ if (Subtarget.is32BitELFABI())
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
else
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
@@ -553,17 +551,25 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
- for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
// For v2i64, these are only valid with P8Vector. This is corrected after
// the loop.
- setOperationAction(ISD::SMAX, VT, Legal);
- setOperationAction(ISD::SMIN, VT, Legal);
- setOperationAction(ISD::UMAX, VT, Legal);
- setOperationAction(ISD::UMIN, VT, Legal);
+ if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
+ setOperationAction(ISD::SMAX, VT, Legal);
+ setOperationAction(ISD::SMIN, VT, Legal);
+ setOperationAction(ISD::UMAX, VT, Legal);
+ setOperationAction(ISD::UMIN, VT, Legal);
+ }
+ else {
+ setOperationAction(ISD::SMAX, VT, Expand);
+ setOperationAction(ISD::SMIN, VT, Expand);
+ setOperationAction(ISD::UMAX, VT, Expand);
+ setOperationAction(ISD::UMIN, VT, Expand);
+ }
if (Subtarget.hasVSX()) {
setOperationAction(ISD::FMAXNUM, VT, Legal);
@@ -646,7 +652,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
- for (MVT InnerVT : MVT::vector_valuetypes()) {
+ for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
@@ -944,7 +950,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
- setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
@@ -1118,6 +1123,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::TRUNCATE);
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+
if (Subtarget.useCRBits()) {
setTargetDAGCombine(ISD::TRUNCATE);
@@ -1172,9 +1179,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setJumpIsExpensive();
}
- setMinFunctionAlignment(2);
+ setMinFunctionAlignment(Align(4));
if (Subtarget.isDarwin())
- setPrefFunctionAlignment(4);
+ setPrefFunctionAlignment(Align(16));
switch (Subtarget.getDarwinDirective()) {
default: break;
@@ -1191,8 +1198,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
case PPC::DIR_PWR9:
- setPrefFunctionAlignment(4);
- setPrefLoopAlignment(4);
+ setPrefLoopAlignment(Align(16));
+ setPrefFunctionAlignment(Align(16));
break;
}
@@ -1352,6 +1359,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
+ case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
+ case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
case PPCISD::ST_VSR_SCAL_INT:
return "PPCISD::ST_VSR_SCAL_INT";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
@@ -1396,7 +1405,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
- case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH";
+ case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
+ case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
}
return nullptr;
}
@@ -1517,7 +1527,7 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
const PPCSubtarget& Subtarget =
- static_cast<const PPCSubtarget&>(DAG.getSubtarget());
+ static_cast<const PPCSubtarget&>(DAG.getSubtarget());
if (!Subtarget.hasP8Vector())
return false;
@@ -1769,10 +1779,10 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
-/// VSPLTB/VSPLTH/VSPLTW.
+/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
- assert(N->getValueType(0) == MVT::v16i8 &&
- (EltSize == 1 || EltSize == 2 || EltSize == 4));
+ assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
+ EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
// The consecutive indices need to specify an element, not part of two
// different elements. So abandon ship early if this isn't the case.
@@ -2065,10 +2075,11 @@ bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
}
-/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
-/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
-unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
- SelectionDAG &DAG) {
+/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
+/// appropriate for PPC mnemonics (which have a big endian bias - namely
+/// elements are counted from the left of the vector register).
+unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
+ SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
if (DAG.getDataLayout().isLittleEndian())
@@ -2667,12 +2678,14 @@ static void setUsesTOCBasePtr(SelectionDAG &DAG) {
setUsesTOCBasePtr(DAG.getMachineFunction());
}
-static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
- SDValue GA) {
+SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue GA) const {
+ const bool Is64Bit = Subtarget.isPPC64();
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
- SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
- DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
-
+ SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
+ : Subtarget.isAIXABI()
+ ? DAG.getRegister(PPC::R2, VT)
+ : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
SDValue Ops[] = { GA, Reg };
return DAG.getMemIntrinsicNode(
PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
@@ -2688,10 +2701,10 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ if (Subtarget.is64BitELFABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
- return getTOCEntry(DAG, SDLoc(CP), true, GA);
+ return getTOCEntry(DAG, SDLoc(CP), GA);
}
unsigned MOHiFlag, MOLoFlag;
@@ -2701,7 +2714,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
if (IsPIC && Subtarget.isSVR4ABI()) {
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
PPCII::MO_PIC_FLAG);
- return getTOCEntry(DAG, SDLoc(CP), false, GA);
+ return getTOCEntry(DAG, SDLoc(CP), GA);
}
SDValue CPIHi =
@@ -2764,10 +2777,10 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ if (Subtarget.is64BitELFABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
- return getTOCEntry(DAG, SDLoc(JT), true, GA);
+ return getTOCEntry(DAG, SDLoc(JT), GA);
}
unsigned MOHiFlag, MOLoFlag;
@@ -2777,7 +2790,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
if (IsPIC && Subtarget.isSVR4ABI()) {
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
PPCII::MO_PIC_FLAG);
- return getTOCEntry(DAG, SDLoc(GA), false, GA);
+ return getTOCEntry(DAG, SDLoc(GA), GA);
}
SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
@@ -2793,14 +2806,18 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual BlockAddress is stored in the TOC.
- if (Subtarget.isSVR4ABI() &&
- (Subtarget.isPPC64() || isPositionIndependent())) {
- if (Subtarget.isPPC64())
- setUsesTOCBasePtr(DAG);
+ if (Subtarget.is64BitELFABI()) {
+ setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
- return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA);
+ return getTOCEntry(DAG, SDLoc(BASDN), GA);
}
+ // 32-bit position-independent ELF stores the BlockAddress in the .got.
+ if (Subtarget.is32BitELFABI() && isPositionIndependent())
+ return getTOCEntry(
+ DAG, SDLoc(BASDN),
+ DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
+
unsigned MOHiFlag, MOLoFlag;
bool IsPIC = isPositionIndependent();
getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
@@ -2913,12 +2930,12 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SDLoc DL(GSDN);
const GlobalValue *GV = GSDN->getGlobal();
- // 64-bit SVR4 ABI code is always position-independent.
+ // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
- return getTOCEntry(DAG, DL, true, GA);
+ return getTOCEntry(DAG, DL, GA);
}
unsigned MOHiFlag, MOLoFlag;
@@ -2929,7 +2946,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
GSDN->getOffset(),
PPCII::MO_PIC_FLAG);
- return getTOCEntry(DAG, DL, false, GA);
+ return getTOCEntry(DAG, DL, GA);
}
SDValue GAHi =
@@ -3235,8 +3252,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV, nextOffset));
}
-/// FPR - The set of FP registers that should be allocated for arguments,
-/// on Darwin.
+/// FPR - The set of FP registers that should be allocated for arguments
+/// on Darwin and AIX.
static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
PPC::F11, PPC::F12, PPC::F13};
@@ -3377,17 +3394,17 @@ SDValue PPCTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
- if (Subtarget.isSVR4ABI()) {
- if (Subtarget.isPPC64())
- return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
- dl, DAG, InVals);
- else
- return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
- dl, DAG, InVals);
- } else {
- return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
- dl, DAG, InVals);
- }
+ if (Subtarget.is64BitELFABI())
+ return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
+ InVals);
+ else if (Subtarget.is32BitELFABI())
+ return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
+ InVals);
+
+ // FIXME: We are using this for both AIX and Darwin. We should add appropriate
+ // AIX testing, and rename it appropriately.
+ return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
+ InVals);
}
SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
@@ -3467,7 +3484,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
if (Subtarget.hasP8Vector())
RC = &PPC::VSSRCRegClass;
else if (Subtarget.hasSPE())
- RC = &PPC::SPE4RCRegClass;
+ RC = &PPC::GPRCRegClass;
else
RC = &PPC::F4RCRegClass;
break;
@@ -4516,7 +4533,7 @@ callsShareTOCBase(const Function *Caller, SDValue Callee,
static bool
needStackSlotPassParameters(const PPCSubtarget &Subtarget,
const SmallVectorImpl<ISD::OutputArg> &Outs) {
- assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
+ assert(Subtarget.is64BitELFABI());
const unsigned PtrByteSize = 8;
const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
@@ -4926,7 +4943,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
bool isPPC64 = Subtarget.isPPC64();
bool isSVR4ABI = Subtarget.isSVR4ABI();
- bool isELFv2ABI = Subtarget.isELFv2ABI();
+ bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI();
bool isAIXABI = Subtarget.isAIXABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
@@ -4997,7 +5014,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
// to do the call, we can't use PPCISD::CALL.
SDValue MTCTROps[] = {Chain, Callee, InFlag};
- if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
+ if (is64BitELFv1ABI) {
// Function pointers in the 64-bit SVR4 ABI do not point to the function
// entry point, but to the function descriptor (the function entry point
// address is part of the function descriptor though).
@@ -5085,7 +5102,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
CallOpc = PPCISD::BCTRL;
Callee.setNode(nullptr);
// Add use of X11 (holding environment pointer)
- if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
+ if (is64BitELFv1ABI && !hasNest)
Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
@@ -6730,8 +6747,12 @@ SDValue PPCTargetLowering::LowerCall_AIX(
const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64)
: array_lengthof(GPR_32);
+ const unsigned NumFPRs = array_lengthof(FPR);
+ assert(NumFPRs == 13 && "Only FPR 1-13 could be used for parameter passing "
+ "on AIX");
+
const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
- unsigned GPR_idx = 0;
+ unsigned GPR_idx = 0, FPR_idx = 0;
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
@@ -6768,6 +6789,20 @@ SDValue PPCTargetLowering::LowerCall_AIX(
break;
case MVT::f32:
case MVT::f64:
+ if (FPR_idx != NumFPRs) {
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+ // If we have any FPRs remaining, we may also have GPRs remaining.
+ // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available
+ // GPRs.
+ if (GPR_idx != NumGPRs)
+ ++GPR_idx;
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64)
+ ++GPR_idx;
+ } else
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+ break;
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
@@ -8152,6 +8187,18 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
Op0.getOperand(1));
}
+static const SDValue *getNormalLoadInput(const SDValue &Op) {
+ const SDValue *InputLoad = &Op;
+ if (InputLoad->getOpcode() == ISD::BITCAST)
+ InputLoad = &InputLoad->getOperand(0);
+ if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ InputLoad = &InputLoad->getOperand(0);
+ if (InputLoad->getOpcode() != ISD::LOAD)
+ return nullptr;
+ LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
+ return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -8274,6 +8321,34 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
SplatBitSize > 32) {
+
+ const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0));
+ // Handle load-and-splat patterns as we have instructions that will do this
+ // in one go.
+ if (InputLoad && DAG.isSplatValue(Op, true)) {
+ LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
+
+ // We have handling for 4 and 8 byte elements.
+ unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
+
+ // Checking for a single use of this load, we have to check for vector
+ // width (128 bits) / ElementSize uses (since each operand of the
+ // BUILD_VECTOR is a separate use of the value.
+ if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) &&
+ ((Subtarget.hasVSX() && ElementSize == 64) ||
+ (Subtarget.hasP9Vector() && ElementSize == 32))) {
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr(), // Ptr
+ DAG.getValueType(Op.getValueType()) // VT
+ };
+ return
+ DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl,
+ DAG.getVTList(Op.getValueType(), MVT::Other),
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
+ }
+ }
+
// BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
// lowered to VSX instructions under certain conditions.
// Without VSX, there is no pattern more efficient than expanding the node.
@@ -8759,6 +8834,45 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
unsigned ShiftElts, InsertAtByte;
bool Swap = false;
+
+ // If this is a load-and-splat, we can do that with a single instruction
+ // in some cases. However if the load has multiple uses, we don't want to
+ // combine it because that will just produce multiple loads.
+ const SDValue *InputLoad = getNormalLoadInput(V1);
+ if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
+ (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
+ InputLoad->hasOneUse()) {
+ bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
+ int SplatIdx =
+ PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
+
+ LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
+ // For 4-byte load-and-splat, we need Power9.
+ if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
+ uint64_t Offset = 0;
+ if (IsFourByte)
+ Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
+ else
+ Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
+ SDValue BasePtr = LD->getBasePtr();
+ if (Offset != 0)
+ BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+ BasePtr, DAG.getIntPtrConstant(Offset, dl));
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ BasePtr, // BasePtr
+ DAG.getValueType(Op.getValueType()) // VT
+ };
+ SDVTList VTL =
+ DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
+ SDValue LdSplt =
+ DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
+ if (LdSplt.getValueType() != SVOp->getValueType(0))
+ LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
+ return LdSplt;
+ }
+ }
if (Subtarget.hasP9Vector() &&
PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
isLittleEndian)) {
@@ -8835,7 +8949,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
- int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
+ int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
@@ -9880,6 +9994,30 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
switch (Op0.getOpcode()) {
default:
return SDValue();
+ case ISD::EXTRACT_SUBVECTOR: {
+ assert(Op0.getNumOperands() == 2 &&
+ isa<ConstantSDNode>(Op0->getOperand(1)) &&
+ "Node should have 2 operands with second one being a constant!");
+
+ if (Op0.getOperand(0).getValueType() != MVT::v4f32)
+ return SDValue();
+
+ // Custom lower is only done for high or low doubleword.
+ int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
+ if (Idx % 2 != 0)
+ return SDValue();
+
+ // Since input is v4f32, at this point Idx is either 0 or 2.
+ // Shift to get the doubleword position we want.
+ int DWord = Idx >> 1;
+
+ // High and low word positions are different on little endian.
+ if (Subtarget.isLittleEndian())
+ DWord ^= 0x1;
+
+ return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
+ Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
+ }
case ISD::FADD:
case ISD::FMUL:
case ISD::FSUB: {
@@ -9891,26 +10029,25 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
// Generate new load node.
LoadSDNode *LD = cast<LoadSDNode>(LdOp);
- SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
- NewLoad[i] =
- DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
- DAG.getVTList(MVT::v4f32, MVT::Other),
- LoadOps, LD->getMemoryVT(),
- LD->getMemOperand());
- }
- SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32,
- NewLoad[0], NewLoad[1],
- Op0.getNode()->getFlags());
- return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp);
+ SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
+ NewLoad[i] = DAG.getMemIntrinsicNode(
+ PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
+ LD->getMemoryVT(), LD->getMemOperand());
+ }
+ SDValue NewOp =
+ DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
+ NewLoad[1], Op0.getNode()->getFlags());
+ return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
+ DAG.getConstant(0, dl, MVT::i32));
}
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op0);
- SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
- SDValue NewLd =
- DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
- DAG.getVTList(MVT::v4f32, MVT::Other),
- LoadOps, LD->getMemoryVT(), LD->getMemOperand());
- return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd);
+ SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
+ SDValue NewLd = DAG.getMemIntrinsicNode(
+ PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
+ LD->getMemoryVT(), LD->getMemOperand());
+ return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
+ DAG.getConstant(0, dl, MVT::i32));
}
}
llvm_unreachable("ERROR:Should return for all cases within swtich.");
@@ -10048,9 +10185,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
return;
case ISD::TRUNCATE: {
EVT TrgVT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
if (TrgVT.isVector() &&
isOperationCustom(N->getOpcode(), TrgVT) &&
- N->getOperand(0).getValueType().getSizeInBits() <= 128)
+ OpVT.getSizeInBits() <= 128 &&
+ isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits()))
Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
return;
}
@@ -10192,7 +10331,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
if (CmpOpcode) {
// Signed comparisons of byte or halfword values must be sign-extended.
if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
- unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
ExtReg).addReg(dest);
BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
@@ -10243,10 +10382,10 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
MachineFunction *F = BB->getParent();
MachineFunction::iterator It = ++BB->getIterator();
- unsigned dest = MI.getOperand(0).getReg();
- unsigned ptrA = MI.getOperand(1).getReg();
- unsigned ptrB = MI.getOperand(2).getReg();
- unsigned incr = MI.getOperand(3).getReg();
+ Register dest = MI.getOperand(0).getReg();
+ Register ptrA = MI.getOperand(1).getReg();
+ Register ptrB = MI.getOperand(2).getReg();
+ Register incr = MI.getOperand(3).getReg();
DebugLoc dl = MI.getDebugLoc();
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -10364,7 +10503,7 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
if (CmpOpcode) {
// For unsigned comparisons, we can directly compare the shifted values.
// For signed comparisons we shift and sign extend.
- unsigned SReg = RegInfo.createVirtualRegister(GPRC);
+ Register SReg = RegInfo.createVirtualRegister(GPRC);
BuildMI(BB, dl, TII->get(PPC::AND), SReg)
.addReg(TmpDestReg)
.addReg(MaskReg);
@@ -10375,7 +10514,7 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
.addReg(SReg)
.addReg(ShiftReg);
- unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC);
+ Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
.addReg(ValueReg);
ValueReg = ValueSReg;
@@ -10426,11 +10565,11 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = ++MBB->getIterator();
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
- unsigned mainDstReg = MRI.createVirtualRegister(RC);
- unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+ Register mainDstReg = MRI.createVirtualRegister(RC);
+ Register restoreDstReg = MRI.createVirtualRegister(RC);
MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
@@ -10482,10 +10621,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
// Prepare IP either in reg.
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
- unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
- unsigned BufReg = MI.getOperand(1).getReg();
+ Register LabelReg = MRI.createVirtualRegister(PtrRC);
+ Register BufReg = MI.getOperand(1).getReg();
- if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
+ if (Subtarget.is64BitELFABI()) {
setUsesTOCBasePtr(*MBB->getParent());
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
.addReg(PPC::X2)
@@ -10570,7 +10709,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
const TargetRegisterClass *RC =
(PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
- unsigned Tmp = MRI.createVirtualRegister(RC);
+ Register Tmp = MRI.createVirtualRegister(RC);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
@@ -10587,7 +10726,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
const int64_t TOCOffset = 3 * PVT.getStoreSize();
const int64_t BPOffset = 4 * PVT.getStoreSize();
- unsigned BufReg = MI.getOperand(0).getReg();
+ Register BufReg = MI.getOperand(0).getReg();
// Reload FP (the jumped-to function may not have had a
// frame pointer, and if so, then its r31 will be restored
@@ -10662,7 +10801,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
if (MI.getOpcode() == TargetOpcode::STACKMAP ||
MI.getOpcode() == TargetOpcode::PATCHPOINT) {
- if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
+ if (Subtarget.is64BitELFABI() &&
MI.getOpcode() == TargetOpcode::PATCHPOINT) {
// Call lowering should have added an r2 operand to indicate a dependence
// on the TOC base pointer value. It can't however, because there is no
@@ -10828,15 +10967,15 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BB = readMBB;
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
- unsigned LoReg = MI.getOperand(0).getReg();
- unsigned HiReg = MI.getOperand(1).getReg();
+ Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ Register LoReg = MI.getOperand(0).getReg();
+ Register HiReg = MI.getOperand(1).getReg();
BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
- unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+ Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
.addReg(HiReg)
@@ -10978,11 +11117,11 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
StoreMnemonic = PPC::STDCX;
break;
}
- unsigned dest = MI.getOperand(0).getReg();
- unsigned ptrA = MI.getOperand(1).getReg();
- unsigned ptrB = MI.getOperand(2).getReg();
- unsigned oldval = MI.getOperand(3).getReg();
- unsigned newval = MI.getOperand(4).getReg();
+ Register dest = MI.getOperand(0).getReg();
+ Register ptrA = MI.getOperand(1).getReg();
+ Register ptrB = MI.getOperand(2).getReg();
+ Register oldval = MI.getOperand(3).getReg();
+ Register newval = MI.getOperand(4).getReg();
DebugLoc dl = MI.getDebugLoc();
MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -11057,11 +11196,11 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
bool isLittleEndian = Subtarget.isLittleEndian();
bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
- unsigned dest = MI.getOperand(0).getReg();
- unsigned ptrA = MI.getOperand(1).getReg();
- unsigned ptrB = MI.getOperand(2).getReg();
- unsigned oldval = MI.getOperand(3).getReg();
- unsigned newval = MI.getOperand(4).getReg();
+ Register dest = MI.getOperand(0).getReg();
+ Register ptrA = MI.getOperand(1).getReg();
+ Register ptrB = MI.getOperand(2).getReg();
+ Register oldval = MI.getOperand(3).getReg();
+ Register newval = MI.getOperand(4).getReg();
DebugLoc dl = MI.getDebugLoc();
MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -11238,13 +11377,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// This pseudo performs an FADD with rounding mode temporarily forced
// to round-to-zero. We emit this via custom inserter since the FPSCR
// is not modeled at the SelectionDAG level.
- unsigned Dest = MI.getOperand(0).getReg();
- unsigned Src1 = MI.getOperand(1).getReg();
- unsigned Src2 = MI.getOperand(2).getReg();
+ Register Dest = MI.getOperand(0).getReg();
+ Register Src1 = MI.getOperand(1).getReg();
+ Register Src2 = MI.getOperand(2).getReg();
DebugLoc dl = MI.getDebugLoc();
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+ Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
// Save FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
@@ -11270,7 +11409,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned Dest = RegInfo.createVirtualRegister(
+ Register Dest = RegInfo.createVirtualRegister(
Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
DebugLoc dl = MI.getDebugLoc();
@@ -11283,7 +11422,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
} else if (MI.getOpcode() == PPC::TCHECK_RET) {
DebugLoc Dl = MI.getDebugLoc();
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+ Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
MI.getOperand(0).getReg())
@@ -11297,7 +11436,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addReg(PPC::CR0EQ);
} else if (MI.getOpcode() == PPC::SETRNDi) {
DebugLoc dl = MI.getDebugLoc();
- unsigned OldFPSCRReg = MI.getOperand(0).getReg();
+ Register OldFPSCRReg = MI.getOperand(0).getReg();
// Save FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
@@ -11378,7 +11517,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
};
- unsigned OldFPSCRReg = MI.getOperand(0).getReg();
+ Register OldFPSCRReg = MI.getOperand(0).getReg();
// Save FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
@@ -11393,12 +11532,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// mtfsf 255, NewFPSCRReg
MachineOperand SrcOp = MI.getOperand(1);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
- unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
- unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
// The first operand of INSERT_SUBREG should be a register which has
// subregisters, we only care about its RegClass, so we should use an
@@ -11409,14 +11548,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.add(SrcOp)
.addImm(1);
- unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
.addReg(OldFPSCRTmpReg)
.addReg(ExtSrcReg)
.addImm(0)
.addImm(62);
- unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+ Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
// The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
@@ -13113,6 +13252,61 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
return Val;
}
+SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
+ LSBaseSDNode *LSBase,
+ DAGCombinerInfo &DCI) const {
+ assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
+ "Not a reverse memop pattern!");
+
+ auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
+ auto Mask = SVN->getMask();
+ int i = 0;
+ auto I = Mask.rbegin();
+ auto E = Mask.rend();
+
+ for (; I != E; ++I) {
+ if (*I != i)
+ return false;
+ i++;
+ }
+ return true;
+ };
+
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = SVN->getValueType(0);
+
+ if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
+ return SDValue();
+
+ // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
+ // See comment in PPCVSXSwapRemoval.cpp.
+ // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
+ if (!Subtarget.hasP9Vector())
+ return SDValue();
+
+ if(!IsElementReverse(SVN))
+ return SDValue();
+
+ if (LSBase->getOpcode() == ISD::LOAD) {
+ SDLoc dl(SVN);
+ SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
+ return DAG.getMemIntrinsicNode(
+ PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
+ LSBase->getMemoryVT(), LSBase->getMemOperand());
+ }
+
+ if (LSBase->getOpcode() == ISD::STORE) {
+ SDLoc dl(LSBase);
+ SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
+ LSBase->getBasePtr()};
+ return DAG.getMemIntrinsicNode(
+ PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
+ LSBase->getMemoryVT(), LSBase->getMemOperand());
+ }
+
+ llvm_unreachable("Expected a load or store node here");
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -13159,6 +13353,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return combineFPToIntToFP(N, DCI);
+ case ISD::VECTOR_SHUFFLE:
+ if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
+ LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
+ return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
+ }
+ break;
case ISD::STORE: {
EVT Op1VT = N->getOperand(1).getValueType();
@@ -13170,6 +13370,13 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return Val;
}
+ if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
+ SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
+ if (Val)
+ return Val;
+ }
+
// Turn STORE (BSWAP) -> sthbrx/stwbrx.
if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
N->getOperand(1).getNode()->hasOneUse() &&
@@ -13903,7 +14110,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
}
-unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
+Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
switch (Subtarget.getDarwinDirective()) {
default: break;
case PPC::DIR_970:
@@ -13924,7 +14131,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
// Actual alignment of the loop will depend on the hotness check and other
// logic in alignBlocks.
if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
- return 5;
+ return Align(32);
}
const PPCInstrInfo *TII = Subtarget.getInstrInfo();
@@ -13940,7 +14147,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
}
if (LoopSize > 16 && LoopSize <= 32)
- return 5;
+ return Align(32);
break;
}
@@ -14063,7 +14270,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 'f':
if (Subtarget.hasSPE()) {
if (VT == MVT::f32 || VT == MVT::i32)
- return std::make_pair(0U, &PPC::SPE4RCRegClass);
+ return std::make_pair(0U, &PPC::GPRCRegClass);
if (VT == MVT::f64 || VT == MVT::i64)
return std::make_pair(0U, &PPC::SPERCRegClass);
} else {
@@ -14306,22 +14513,22 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const {
+Register PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const {
bool isPPC64 = Subtarget.isPPC64();
- bool isDarwinABI = Subtarget.isDarwinABI();
+ bool IsDarwinABI = Subtarget.isDarwinABI();
if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
(!isPPC64 && VT != MVT::i32))
report_fatal_error("Invalid register global variable type");
bool is64Bit = isPPC64 && VT == MVT::i64;
- unsigned Reg = StringSwitch<unsigned>(RegName)
+ Register Reg = StringSwitch<Register>(RegName)
.Case("r1", is64Bit ? PPC::X1 : PPC::R1)
- .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
- .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
+ .Case("r2", (IsDarwinABI || isPPC64) ? Register() : PPC::R2)
+ .Case("r13", (!isPPC64 && IsDarwinABI) ? Register() :
(is64Bit ? PPC::X13 : PPC::R13))
- .Default(0);
+ .Default(Register());
if (Reg)
return Reg;
@@ -14330,14 +14537,17 @@ unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
// 32-bit SVR4 ABI access everything as got-indirect.
- if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
+ if (Subtarget.is32BitELFABI())
+ return true;
+
+ // AIX accesses everything indirectly through the TOC, which is similar to
+ // the GOT.
+ if (Subtarget.isAIXABI())
return true;
CodeModel::Model CModel = getTargetMachine().getCodeModel();
// If it is small or large code model, module locals are accessed
- // indirectly by loading their address from .toc/.got. The difference
- // is that for large code model we have ADDISTocHa + LDtocL and for
- // small code model we simply have LDtoc.
+ // indirectly by loading their address from .toc/.got.
if (CModel == CodeModel::Small || CModel == CodeModel::Large)
return true;
@@ -14345,14 +14555,8 @@ bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
return true;
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
- const GlobalValue *GV = G->getGlobal();
- unsigned char GVFlags = Subtarget.classifyGlobalReference(GV);
- // The NLP flag indicates that a global access has to use an
- // extra indirection.
- if (GVFlags & PPCII::MO_NLP_FLAG)
- return true;
- }
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
+ return Subtarget.isGVIndirectSymbol(G->getGlobal());
return false;
}
@@ -14417,7 +14621,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = -VT.getStoreSize()+1;
Info.size = 2*VT.getStoreSize()-1;
- Info.align = 1;
+ Info.align = Align::None();
Info.flags = MachineMemOperand::MOLoad;
return true;
}
@@ -14451,7 +14655,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.size = VT.getStoreSize();
- Info.align = 1;
+ Info.align = Align::None();
Info.flags = MachineMemOperand::MOLoad;
return true;
}
@@ -14503,7 +14707,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(1);
Info.offset = -VT.getStoreSize()+1;
Info.size = 2*VT.getStoreSize()-1;
- Info.align = 1;
+ Info.align = Align::None();
Info.flags = MachineMemOperand::MOStore;
return true;
}
@@ -14536,7 +14740,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.size = VT.getStoreSize();
- Info.align = 1;
+ Info.align = Align::None();
Info.flags = MachineMemOperand::MOStore;
return true;
}
@@ -14786,7 +14990,7 @@ void PPCTargetLowering::insertCopiesSplitCSR(
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
- unsigned NewVR = MRI->createVirtualRegister(RC);
+ Register NewVR = MRI->createVirtualRegister(RC);
// Create copy from CSR to a virtual register.
// FIXME: this currently does not emit CFI pseudo-instructions, it works
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
@@ -15146,7 +15350,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
- if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
+ if (!Subtarget.is64BitELFABI())
return false;
// If not a tail call then no need to proceed.
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 97422c6eda36..62922ea2d4c4 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -412,8 +412,9 @@ namespace llvm {
/// representation.
QBFLT,
- /// Custom extend v4f32 to v2f64.
- FP_EXTEND_LH,
+ /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
+ /// lower (IDX=1) half of v4f32 to v2f64.
+ FP_EXTEND_HALF,
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
@@ -456,15 +457,29 @@ namespace llvm {
/// an xxswapd.
LXVD2X,
+ /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
+ /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
+ /// the vector type to load vector in big-endian element order.
+ LOAD_VEC_BE,
+
/// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
/// v2f32 value into the lower half of a VSR register.
LD_VSX_LH,
+ /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
+ /// instructions such as LXVDSX, LXVWSX.
+ LD_SPLAT,
+
/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
/// Maps directly to an stxvd2x instruction that will be preceded by
/// an xxswapd.
STXVD2X,
+ /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
+ /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
+ /// the vector type to store vector in big-endian element order.
+ STORE_VEC_BE,
+
/// Store scalar integers from VSR.
ST_VSR_SCAL_INT,
@@ -563,9 +578,11 @@ namespace llvm {
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE);
- /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
- /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
- unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
+ /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
+ /// appropriate for PPC mnemonics (which have a big endian bias - namely
+ /// elements are counted from the left of the vector register).
+ unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
+ SelectionDAG &DAG);
/// get_VSPLTI_elt - If this is a build_vector of constants which can be
/// formed by using a vspltis[bhw] instruction of the specified element
@@ -716,8 +733,8 @@ namespace llvm {
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
- unsigned getRegisterByName(const char* RegName, EVT VT,
- SelectionDAG &DAG) const override;
+ Register getRegisterByName(const char* RegName, EVT VT,
+ const MachineFunction &MF) const override;
void computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
@@ -725,7 +742,7 @@ namespace llvm {
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
- unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
+ Align getPrefLoopAlignment(MachineLoop *ML) const override;
bool shouldInsertFencesForAtomic(const Instruction *I) const override {
return true;
@@ -834,6 +851,18 @@ namespace llvm {
return true;
}
+ bool isDesirableToTransformToIntegerOp(unsigned Opc,
+ EVT VT) const override {
+ // Only handle float load/store pair because float(fpr) load/store
+ // instruction has more cycles than integer(gpr) load/store in PPC.
+ if (Opc != ISD::LOAD && Opc != ISD::STORE)
+ return false;
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return false;
+
+ return true;
+ }
+
// Returns true if the address of the global is stored in TOC entry.
bool isAccessedAsGotIndirect(SDValue N) const;
@@ -998,6 +1027,8 @@ namespace llvm {
SDValue &FPOpOut,
const SDLoc &dl) const;
+ SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const;
+
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
@@ -1155,6 +1186,8 @@ namespace llvm {
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,
+ DAGCombinerInfo &DCI) const;
/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
/// SETCC with integer subtraction when (1) there is a legal way of doing it
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index d598567f8e4e..f16187149d36 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1099,8 +1099,8 @@ def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src),
// Support for medium and large code model.
let hasSideEffects = 0 in {
let isReMaterializable = 1 in {
-def ADDIStocHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
- "#ADDIStocHA", []>, isPPC64;
+def ADDIStocHA8: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+ "#ADDIStocHA8", []>, isPPC64;
def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDItocL", []>, isPPC64;
}
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 8176c5120a83..fd3fc2af2327 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -215,21 +215,21 @@ def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG), SDLoc(N));
+ return getI32Imm(PPC::getSplatIdxForPPCMnemonics(N, 1, *CurDAG), SDLoc(N));
}]>;
def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
}], VSPLTB_get_imm>;
def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG), SDLoc(N));
+ return getI32Imm(PPC::getSplatIdxForPPCMnemonics(N, 2, *CurDAG), SDLoc(N));
}]>;
def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
}], VSPLTH_get_imm>;
def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG), SDLoc(N));
+ return getI32Imm(PPC::getSplatIdxForPPCMnemonics(N, 4, *CurDAG), SDLoc(N));
}]>;
def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
@@ -331,7 +331,7 @@ class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
: VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
!strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
- [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>;
+ [(set Ty:$vD, (IntID Ty:$vA, timm:$ST, timm:$SIX))]>;
//===----------------------------------------------------------------------===//
// Instruction Definitions.
@@ -401,10 +401,10 @@ let isCodeGenOnly = 1 in {
def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
"mfvscr $vD", IIC_LdStStore,
- [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
+ [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
"mtvscr $vB", IIC_LdStLoad,
- [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
+ [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads.
def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src),
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index a48eb1690695..96b9c9a119c0 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -1209,20 +1209,13 @@ class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = XT{5};
}
-class XX3Form_Zero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+class XX3Form_SameOp<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XX3Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
let XA = XT;
let XB = XT;
}
-class XX3Form_SetZero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : XX3Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
- let XB = XT;
- let XA = XT;
-}
-
class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index a787bdd56b9d..6b10672965c9 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -90,7 +90,6 @@ enum SpillOpcodeKey {
SOK_QuadBitSpill,
SOK_SpillToVSR,
SOK_SPESpill,
- SOK_SPE4Spill,
SOK_LastOpcodeSpill // This must be last on the enum.
};
@@ -184,10 +183,10 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
- unsigned Reg = DefMO.getReg();
+ Register Reg = DefMO.getReg();
bool IsRegCR;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
const MachineRegisterInfo *MRI =
&DefMI.getParent()->getParent()->getRegInfo();
IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
@@ -330,11 +329,13 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case PPC::LIS8:
case PPC::QVGPCI:
case PPC::ADDIStocHA:
+ case PPC::ADDIStocHA8:
case PPC::ADDItocL:
case PPC::LOAD_STACK_GUARD:
case PPC::XXLXORz:
case PPC::XXLXORspz:
case PPC::XXLXORdpz:
+ case PPC::XXLEQVOnes:
case PPC::V_SET0B:
case PPC::V_SET0H:
case PPC::V_SET0:
@@ -448,7 +449,8 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
return &MI;
}
-bool PPCInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+bool PPCInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
+ unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
// For VSX A-Type FMA instructions, it is the first two operands that can be
// commuted, however, because the non-encoded tied input operand is listed
@@ -966,11 +968,11 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
getKillRegState(KillSrc);
return;
} else if (PPC::SPERCRegClass.contains(SrcReg) &&
- PPC::SPE4RCRegClass.contains(DestReg)) {
+ PPC::GPRCRegClass.contains(DestReg)) {
BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);
getKillRegState(KillSrc);
return;
- } else if (PPC::SPE4RCRegClass.contains(SrcReg) &&
+ } else if (PPC::GPRCRegClass.contains(SrcReg) &&
PPC::SPERCRegClass.contains(DestReg)) {
BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);
getKillRegState(KillSrc);
@@ -1009,8 +1011,6 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = PPC::QVFMRb;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
- else if (PPC::SPE4RCRegClass.contains(DestReg, SrcReg))
- Opc = PPC::OR;
else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
Opc = PPC::EVOR;
else
@@ -1043,8 +1043,6 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg,
OpcodeIndex = SOK_Float4Spill;
} else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_SPESpill;
- } else if (PPC::SPE4RCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
@@ -1083,8 +1081,6 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg,
OpcodeIndex = SOK_Float4Spill;
} else if (PPC::SPERCRegClass.contains(Reg)) {
OpcodeIndex = SOK_SPESpill;
- } else if (PPC::SPE4RCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.contains(Reg)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.contains(Reg)) {
@@ -1133,8 +1129,6 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg,
OpcodeIndex = SOK_Float4Spill;
} else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_SPESpill;
- } else if (PPC::SPE4RCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
@@ -1173,8 +1167,6 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg,
OpcodeIndex = SOK_Float4Spill;
} else if (PPC::SPERCRegClass.contains(Reg)) {
OpcodeIndex = SOK_SPESpill;
- } else if (PPC::SPE4RCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.contains(Reg)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.contains(Reg)) {
@@ -1648,7 +1640,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
return false;
int OpC = CmpInstr.getOpcode();
- unsigned CRReg = CmpInstr.getOperand(0).getReg();
+ Register CRReg = CmpInstr.getOperand(0).getReg();
// FP record forms set CR1 based on the exception status bits, not a
// comparison with zero.
@@ -1671,7 +1663,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// Look through copies unless that gets us to a physical register.
unsigned ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
- if (TargetRegisterInfo::isVirtualRegister(ActualSrc))
+ if (Register::isVirtualRegister(ActualSrc))
SrcReg = ActualSrc;
// Get the unique definition of SrcReg.
@@ -1937,7 +1929,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// Rotates are expensive instructions. If we're emitting a record-form
// rotate that can just be an andi/andis, we should just emit that.
if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
- unsigned GPRRes = MI->getOperand(0).getReg();
+ Register GPRRes = MI->getOperand(0).getReg();
int64_t SH = MI->getOperand(2).getImm();
int64_t MB = MI->getOperand(3).getImm();
int64_t ME = MI->getOperand(4).getImm();
@@ -2122,7 +2114,7 @@ bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const {
llvm_unreachable("Unknown Operation!");
}
- unsigned TargetReg = MI.getOperand(0).getReg();
+ Register TargetReg = MI.getOperand(0).getReg();
unsigned Opcode;
if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
(TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
@@ -2184,7 +2176,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return expandVSXMemPseudo(MI);
}
case PPC::SPILLTOVSR_LD: {
- unsigned TargetReg = MI.getOperand(0).getReg();
+ Register TargetReg = MI.getOperand(0).getReg();
if (PPC::VSFRCRegClass.contains(TargetReg)) {
MI.setDesc(get(PPC::DFLOADf64));
return expandPostRAPseudo(MI);
@@ -2194,7 +2186,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
case PPC::SPILLTOVSR_ST: {
- unsigned SrcReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(0).getReg();
if (PPC::VSFRCRegClass.contains(SrcReg)) {
NumStoreSPILLVSRRCAsVec++;
MI.setDesc(get(PPC::DFSTOREf64));
@@ -2206,7 +2198,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
case PPC::SPILLTOVSR_LDX: {
- unsigned TargetReg = MI.getOperand(0).getReg();
+ Register TargetReg = MI.getOperand(0).getReg();
if (PPC::VSFRCRegClass.contains(TargetReg))
MI.setDesc(get(PPC::LXSDX));
else
@@ -2214,7 +2206,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
case PPC::SPILLTOVSR_STX: {
- unsigned SrcReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(0).getReg();
if (PPC::VSFRCRegClass.contains(SrcReg)) {
NumStoreSPILLVSRRCAsVec++;
MI.setDesc(get(PPC::STXSDX));
@@ -2279,10 +2271,10 @@ void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI,
int64_t Imm) const {
assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
// Replace the REG with the Immediate.
- unsigned InUseReg = MI.getOperand(OpNo).getReg();
+ Register InUseReg = MI.getOperand(OpNo).getReg();
MI.getOperand(OpNo).ChangeToImmediate(Imm);
- if (empty(MI.implicit_operands()))
+ if (MI.implicit_operands().empty())
return;
// We need to make sure that the MI didn't have any implicit use
@@ -2328,6 +2320,23 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
.addImm(LII.Imm);
}
+MachineInstr *PPCInstrInfo::getDefMIPostRA(unsigned Reg, MachineInstr &MI,
+ bool &SeenIntermediateUse) const {
+ assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
+ "Should be called after register allocation.");
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
+ It++;
+ SeenIntermediateUse = false;
+ for (; It != E; ++It) {
+ if (It->modifiesRegister(Reg, TRI))
+ return &*It;
+ if (It->readsRegister(Reg, TRI))
+ SeenIntermediateUse = true;
+ }
+ return nullptr;
+}
+
MachineInstr *PPCInstrInfo::getForwardingDefMI(
MachineInstr &MI,
unsigned &OpNoForForwarding,
@@ -2342,11 +2351,11 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
if (!MI.getOperand(i).isReg())
continue;
- unsigned Reg = MI.getOperand(i).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MI.getOperand(i).getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
unsigned TrueReg = TRI->lookThruCopyLike(Reg, MRI);
- if (TargetRegisterInfo::isVirtualRegister(TrueReg)) {
+ if (Register::isVirtualRegister(TrueReg)) {
DefMI = MRI->getVRegDef(TrueReg);
if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) {
OpNoForForwarding = i;
@@ -2370,7 +2379,10 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
Opc == PPC::RLWINM || Opc == PPC::RLWINMo ||
Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
- if (!instrHasImmForm(MI, III, true) && !ConvertibleImmForm)
+ bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
+ ? isVFRegister(MI.getOperand(0).getReg())
+ : false;
+ if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
return nullptr;
// Don't convert or %X, %Y, %Y since that's just a register move.
@@ -2381,29 +2393,24 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
MachineOperand &MO = MI.getOperand(i);
SeenIntermediateUse = false;
if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
- MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
- It++;
- unsigned Reg = MI.getOperand(i).getReg();
-
- // Is this register defined by some form of add-immediate (including
- // load-immediate) within this basic block?
- for ( ; It != E; ++It) {
- if (It->modifiesRegister(Reg, &getRegisterInfo())) {
- switch (It->getOpcode()) {
- default: break;
- case PPC::LI:
- case PPC::LI8:
- case PPC::ADDItocL:
- case PPC::ADDI:
- case PPC::ADDI8:
- OpNoForForwarding = i;
- return &*It;
- }
+ Register Reg = MI.getOperand(i).getReg();
+ // If we see another use of this reg between the def and the MI,
+ // we want to flat it so the def isn't deleted.
+ MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
+ if (DefMI) {
+ // Is this register defined by some form of add-immediate (including
+ // load-immediate) within this basic block?
+ switch (DefMI->getOpcode()) {
+ default:
break;
- } else if (It->readsRegister(Reg, &getRegisterInfo()))
- // If we see another use of this reg between the def and the MI,
- // we want to flat it so the def isn't deleted.
- SeenIntermediateUse = true;
+ case PPC::LI:
+ case PPC::LI8:
+ case PPC::ADDItocL:
+ case PPC::ADDI:
+ case PPC::ADDI8:
+ OpNoForForwarding = i;
+ return DefMI;
+ }
}
}
}
@@ -2417,7 +2424,7 @@ const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const {
{PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR,
PPC::SPILL_CRBIT, PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX,
PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb,
- PPC::SPILLTOVSR_ST, PPC::EVSTDD, PPC::SPESTW},
+ PPC::SPILLTOVSR_ST, PPC::EVSTDD},
// Power 9
{PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR,
PPC::SPILL_CRBIT, PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32,
@@ -2433,7 +2440,7 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
{PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR,
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX,
PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb,
- PPC::SPILLTOVSR_LD, PPC::EVLDD, PPC::SPELWZ},
+ PPC::SPILLTOVSR_LD, PPC::EVLDD},
// Power 9
{PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR,
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, PPC::DFLOADf32,
@@ -2538,12 +2545,15 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
"The forwarding operand needs to be valid at this point");
bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
- unsigned ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg();
+ Register ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg();
if (KilledDef && KillFwdDefMI)
*KilledDef = DefMI;
ImmInstrInfo III;
- bool HasImmForm = instrHasImmForm(MI, III, PostRA);
+ bool IsVFReg = MI.getOperand(0).isReg()
+ ? isVFRegister(MI.getOperand(0).getReg())
+ : false;
+ bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
// If this is a reg+reg instruction that has a reg+imm form,
// and one of the operands is produced by an add-immediate,
// try to convert it.
@@ -2591,7 +2601,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
// If a compare-immediate is fed by an immediate and is itself an input of
// an ISEL (the most common case) into a COPY of the correct register.
bool Changed = false;
- unsigned DefReg = MI.getOperand(0).getReg();
+ Register DefReg = MI.getOperand(0).getReg();
int64_t Comparand = MI.getOperand(2).getImm();
int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 ?
(Comparand | 0xFFFFFFFFFFFF0000) : Comparand;
@@ -2601,8 +2611,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
continue;
unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
- unsigned TrueReg = CompareUseMI.getOperand(1).getReg();
- unsigned FalseReg = CompareUseMI.getOperand(2).getReg();
+ Register TrueReg = CompareUseMI.getOperand(1).getReg();
+ Register FalseReg = CompareUseMI.getOperand(2).getReg();
unsigned RegToCopy = selectReg(SExtImm, SExtComparand, Opc, TrueReg,
FalseReg, CRSubReg);
if (RegToCopy == PPC::NoRegister)
@@ -2777,9 +2787,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
return false;
}
-bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
+bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
ImmInstrInfo &III, bool PostRA) const {
- unsigned Opc = MI.getOpcode();
// The vast majority of the instructions would need their operand 2 replaced
// with an immediate when switching to the reg+imm form. A marked exception
// are the update form loads/stores for which a constant operand 2 would need
@@ -3111,7 +3120,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::LXSSPX:
if (PostRA) {
- if (isVFRegister(MI.getOperand(0).getReg()))
+ if (IsVFReg)
III.ImmOpcode = PPC::LXSSP;
else {
III.ImmOpcode = PPC::LFS;
@@ -3125,7 +3134,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::LXSDX:
if (PostRA) {
- if (isVFRegister(MI.getOperand(0).getReg()))
+ if (IsVFReg)
III.ImmOpcode = PPC::LXSD;
else {
III.ImmOpcode = PPC::LFD;
@@ -3143,7 +3152,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::STXSSPX:
if (PostRA) {
- if (isVFRegister(MI.getOperand(0).getReg()))
+ if (IsVFReg)
III.ImmOpcode = PPC::STXSSP;
else {
III.ImmOpcode = PPC::STFS;
@@ -3157,7 +3166,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::STXSDX:
if (PostRA) {
- if (isVFRegister(MI.getOperand(0).getReg()))
+ if (IsVFReg)
III.ImmOpcode = PPC::STXSD;
else {
III.ImmOpcode = PPC::STFD;
@@ -3287,7 +3296,7 @@ bool PPCInstrInfo::isRegElgibleForForwarding(
if (MRI.isSSA())
return false;
- unsigned Reg = RegMO.getReg();
+ Register Reg = RegMO.getReg();
// Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
MachineBasicBlock::const_reverse_iterator It = MI;
@@ -3511,8 +3520,8 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
III.ZeroIsSpecialNew + 1;
- unsigned OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
- unsigned NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
+ Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
+ Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
// If R0 is in the operand where zero is special for the new instruction,
// it is unsafe to transform if the constant operand isn't that operand.
if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
@@ -3563,16 +3572,20 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
} else {
// The 32 bit and 64 bit instructions are quite different.
if (SpecialShift32) {
- // Left shifts use (N, 0, 31-N), right shifts use (32-N, N, 31).
- uint64_t SH = RightShift ? 32 - ShAmt : ShAmt;
+ // Left shifts use (N, 0, 31-N).
+ // Right shifts use (32-N, N, 31) if 0 < N < 32.
+ // use (0, 0, 31) if N == 0.
+ uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt;
uint64_t MB = RightShift ? ShAmt : 0;
uint64_t ME = RightShift ? 31 : 31 - ShAmt;
replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);
MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
.addImm(ME);
} else {
- // Left shifts use (N, 63-N), right shifts use (64-N, N).
- uint64_t SH = RightShift ? 64 - ShAmt : ShAmt;
+ // Left shifts use (N, 63-N).
+ // Right shifts use (64-N, N) if 0 < N < 64.
+ // use (0, 0) if N == 0.
+ uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt;
uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);
MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
@@ -3601,8 +3614,8 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
if (III.ZeroIsSpecialNew) {
// If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
// need to fix up register class.
- unsigned RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
- if (TargetRegisterInfo::isVirtualRegister(RegToModify)) {
+ Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
+ if (Register::isVirtualRegister(RegToModify)) {
const TargetRegisterClass *NewRC =
MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
&PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
@@ -3747,7 +3760,7 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
return false;
unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
unsigned StackOffset = MI.getOperand(1).getImm();
- unsigned StackReg = MI.getOperand(2).getReg();
+ Register StackReg = MI.getOperand(2).getReg();
if (StackReg == PPC::X1 && StackOffset == TOCSaveOffset)
return true;
@@ -3772,7 +3785,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
switch (MI.getOpcode()) {
case PPC::COPY: {
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
// In both ELFv1 and v2 ABI, method parameters and the return value
// are sign- or zero-extended.
@@ -3781,7 +3794,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
// We check the ZExt/SExt flags for a method parameter.
if (MI.getParent()->getBasicBlock() ==
&MF->getFunction().getEntryBlock()) {
- unsigned VReg = MI.getOperand(0).getReg();
+ Register VReg = MI.getOperand(0).getReg();
if (MF->getRegInfo().isLiveIn(VReg))
return SignExt ? FuncInfo->isLiveInSExt(VReg) :
FuncInfo->isLiveInZExt(VReg);
@@ -3818,7 +3831,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
}
// If this is a copy from another register, we recursively check source.
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ if (!Register::isVirtualRegister(SrcReg))
return false;
const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
if (SrcMI != NULL)
@@ -3841,8 +3854,8 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
case PPC::XORIS8: {
// logical operation with 16-bit immediate does not change the upper bits.
// So, we track the operand register as we do for register copy.
- unsigned SrcReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
return false;
const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
if (SrcMI != NULL)
@@ -3870,8 +3883,8 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
for (unsigned I = 1; I != E; I += D) {
if (MI.getOperand(I).isReg()) {
- unsigned SrcReg = MI.getOperand(I).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = MI.getOperand(I).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
return false;
const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1))
@@ -3893,12 +3906,12 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
assert(MI.getOperand(1).isReg() && MI.getOperand(2).isReg());
- unsigned SrcReg1 = MI.getOperand(1).getReg();
- unsigned SrcReg2 = MI.getOperand(2).getReg();
+ Register SrcReg1 = MI.getOperand(1).getReg();
+ Register SrcReg2 = MI.getOperand(2).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg1) ||
- !TargetRegisterInfo::isVirtualRegister(SrcReg2))
- return false;
+ if (!Register::isVirtualRegister(SrcReg1) ||
+ !Register::isVirtualRegister(SrcReg2))
+ return false;
const MachineInstr *MISrc1 = MRI->getVRegDef(SrcReg1);
const MachineInstr *MISrc2 = MRI->getVRegDef(SrcReg2);
@@ -3923,21 +3936,99 @@ bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
}
-bool PPCInstrInfo::analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
- MachineInstr *&CmpInst) const {
- MachineBasicBlock *LoopEnd = L.getBottomBlock();
- MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator();
- // We really "analyze" only CTR loops right now.
- if (I != LoopEnd->end() && isBDNZ(I->getOpcode())) {
- IndVarInst = nullptr;
- CmpInst = &*I;
- return false;
+namespace {
+class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
+ MachineInstr *Loop, *EndLoop, *LoopCount;
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ int64_t TripCount;
+
+public:
+ PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
+ MachineInstr *LoopCount)
+ : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
+ MF(Loop->getParent()->getParent()),
+ TII(MF->getSubtarget().getInstrInfo()) {
+ // Inspect the Loop instruction up-front, as it may be deleted when we call
+ // createTripCountGreaterCondition.
+ if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI)
+ TripCount = LoopCount->getOperand(1).getImm();
+ else
+ TripCount = -1;
}
- return true;
+
+ bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
+ // Only ignore the terminator.
+ return MI == EndLoop;
+ }
+
+ Optional<bool>
+ createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineOperand> &Cond) override {
+ if (TripCount == -1) {
+ // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
+ // so we don't need to generate any thing here.
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateReg(
+ MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
+ true));
+ return {};
+ }
+
+ return TripCount > TC;
+ }
+
+ void setPreheader(MachineBasicBlock *NewPreheader) override {
+ // Do nothing. We want the LOOP setup instruction to stay in the *old*
+ // preheader, so we can use BDZ in the prologs to adapt the loop trip count.
+ }
+
+ void adjustTripCount(int TripCountAdjust) override {
+ // If the loop trip count is a compile-time value, then just change the
+ // value.
+ if (LoopCount->getOpcode() == PPC::LI8 ||
+ LoopCount->getOpcode() == PPC::LI) {
+ int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
+ LoopCount->getOperand(1).setImm(TripCount);
+ return;
+ }
+
+ // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
+ // so we don't need to generate any thing here.
+ }
+
+ void disposed() override {
+ Loop->eraseFromParent();
+ // Ensure the loop setup instruction is deleted too.
+ LoopCount->eraseFromParent();
+ }
+};
+} // namespace
+
+std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
+PPCInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
+ // We really "analyze" only hardware loops right now.
+ MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
+ MachineBasicBlock *Preheader = *LoopBB->pred_begin();
+ if (Preheader == LoopBB)
+ Preheader = *std::next(LoopBB->pred_begin());
+ MachineFunction *MF = Preheader->getParent();
+
+ if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
+ SmallPtrSet<MachineBasicBlock *, 8> Visited;
+ if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
+ Register LoopCountReg = LoopInst->getOperand(0).getReg();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
+ return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
+ }
+ }
+ return nullptr;
}
-MachineInstr *
-PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const {
+MachineInstr *PPCInstrInfo::findLoopInstr(
+ MachineBasicBlock &PreHeader,
+ SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
@@ -3948,50 +4039,6 @@ PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const {
return nullptr;
}
-unsigned PPCInstrInfo::reduceLoopCount(
- MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar,
- MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond,
- SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter,
- unsigned MaxIter) const {
- // We expect a hardware loop currently. This means that IndVar is set
- // to null, and the compare is the ENDLOOP instruction.
- assert((!IndVar) && isBDNZ(Cmp.getOpcode()) && "Expecting a CTR loop");
- MachineFunction *MF = MBB.getParent();
- DebugLoc DL = Cmp.getDebugLoc();
- MachineInstr *Loop = findLoopInstr(PreHeader);
- if (!Loop)
- return 0;
- unsigned LoopCountReg = Loop->getOperand(0).getReg();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
-
- if (!LoopCount)
- return 0;
- // If the loop trip count is a compile-time value, then just change the
- // value.
- if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) {
- int64_t Offset = LoopCount->getOperand(1).getImm();
- if (Offset <= 1) {
- LoopCount->eraseFromParent();
- Loop->eraseFromParent();
- return 0;
- }
- LoopCount->getOperand(1).setImm(Offset - 1);
- return Offset - 1;
- }
-
- // The loop trip count is a run-time value.
- // We need to subtract one from the trip count,
- // and insert branch later to check if we're done with the loop.
-
- // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
- // so we don't need to generate any thing here.
- Cond.push_back(MachineOperand::CreateImm(0));
- Cond.push_back(MachineOperand::CreateReg(
- Subtarget.isPPC64() ? PPC::CTR8 : PPC::CTR, true));
- return LoopCountReg;
-}
-
// Return true if get the base operand, byte offset of an instruction and the
// memory width. Width is the size of memory that is being loaded/stored.
bool PPCInstrInfo::getMemOperandWithOffsetWidth(
@@ -4018,8 +4065,7 @@ bool PPCInstrInfo::getMemOperandWithOffsetWidth(
}
bool PPCInstrInfo::areMemAccessesTriviallyDisjoint(
- const MachineInstr &MIa, const MachineInstr &MIb,
- AliasAnalysis * /*AA*/) const {
+ const MachineInstr &MIa, const MachineInstr &MIb) const {
assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 70fb757e8f1e..19ab30cb0908 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -248,11 +248,11 @@ public:
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AliasAnalysis *AA) const override;
+ AAResults *AA) const override;
unsigned isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
- bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+ bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
void insertNoop(MachineBasicBlock &MBB,
@@ -370,8 +370,7 @@ public:
/// otherwise
bool
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
- const MachineInstr &MIb,
- AliasAnalysis *AA = nullptr) const override;
+ const MachineInstr &MIb) const override;
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
@@ -439,9 +438,14 @@ public:
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo,
int64_t Imm) const;
- bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III,
+ bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III,
bool PostRA) const;
+ // In PostRA phase, try to find instruction defines \p Reg before \p MI.
+ // \p SeenIntermediate is set to true if uses between DefMI and \p MI exist.
+ MachineInstr *getDefMIPostRA(unsigned Reg, MachineInstr &MI,
+ bool &SeenIntermediateUse) const;
+
/// getRegNumForOperand - some operands use different numbering schemes
/// for the same registers. For example, a VSX instruction may have any of
/// vs0-vs63 allocated whereas an Altivec instruction could only have
@@ -481,26 +485,14 @@ public:
/// On PPC, we have two instructions used to set-up the hardware loop
/// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8)
/// instructions to indicate the end of a loop.
- MachineInstr *findLoopInstr(MachineBasicBlock &PreHeader) const;
-
- /// Analyze the loop code to find the loop induction variable and compare used
- /// to compute the number of iterations. Currently, we analyze loop that are
- /// controlled using hardware loops. In this case, the induction variable
- /// instruction is null. For all other cases, this function returns true,
- /// which means we're unable to analyze it. \p IndVarInst and \p CmpInst will
- /// return new values when we can analyze the readonly loop \p L, otherwise,
- /// nothing got changed
- bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
- MachineInstr *&CmpInst) const override;
- /// Generate code to reduce the loop iteration by one and check if the loop
- /// is finished. Return the value/register of the new loop count. We need
- /// this function when peeling off one or more iterations of a loop. This
- /// function assumes the last iteration is peeled first.
- unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader,
- MachineInstr *IndVar, MachineInstr &Cmp,
- SmallVectorImpl<MachineOperand> &Cond,
- SmallVectorImpl<MachineInstr *> &PrevInsts,
- unsigned Iter, unsigned MaxIter) const override;
+ MachineInstr *
+ findLoopInstr(MachineBasicBlock &PreHeader,
+ SmallPtrSet<MachineBasicBlock *, 8> &Visited) const;
+
+ /// Analyze loop L, which must be a single-basic-block loop, and if the
+ /// conditions can be understood enough produce a PipelinerLoopInfo object.
+ std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
+ analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
};
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index c313337047f0..24183277519b 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -386,7 +386,9 @@ def immZExt16 : PatLeaf<(imm), [{
// field. Used by instructions like 'ori'.
return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
}], LO16>;
-def immAnyExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm) || isUInt<8>(Imm); }]>;
+def immNonAllOneAnyExt8 : ImmLeaf<i32, [{
+ return (isInt<8>(Imm) && (Imm != -1)) || (isUInt<8>(Imm) && (Imm != 0xFF));
+}]>;
def immSExt5NonZero : ImmLeaf<i32, [{ return Imm && isInt<5>(Imm); }]>;
// imm16Shifted* - These match immediates where the low 16-bits are zero. There
@@ -577,7 +579,7 @@ def sperc : RegisterOperand<SPERC> {
def PPCRegSPE4RCAsmOperand : AsmOperandClass {
let Name = "RegSPE4RC"; let PredicateMethod = "isRegNumber";
}
-def spe4rc : RegisterOperand<SPE4RC> {
+def spe4rc : RegisterOperand<GPRC> {
let ParserMatchClass = PPCRegSPE4RCAsmOperand;
}
@@ -3161,7 +3163,16 @@ def ADDISdtprelHA32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s1
def LWZtoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
"#LWZtoc",
[(set i32:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+def LWZtocL : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc_nor0:$reg),
+ "#LWZtocL",
+ [(set i32:$rD,
(PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentry32:$disp),
+ "#ADDIStocHA",
+ [(set i32:$rD,
+ (PPCtoc_entry i32:$reg, tglobaladdr:$disp))]>;
+
// Get Global (GOT) Base Register offset, from the word immediately preceding
// the function label.
def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
@@ -3177,21 +3188,21 @@ def : Pat<(srl i32:$rS, i32:$rB),
def : Pat<(shl i32:$rS, i32:$rB),
(SLW $rS, $rB)>;
-def : Pat<(zextloadi1 iaddr:$src),
+def : Pat<(i32 (zextloadi1 iaddr:$src)),
(LBZ iaddr:$src)>;
-def : Pat<(zextloadi1 xaddr:$src),
+def : Pat<(i32 (zextloadi1 xaddr:$src)),
(LBZX xaddr:$src)>;
-def : Pat<(extloadi1 iaddr:$src),
+def : Pat<(i32 (extloadi1 iaddr:$src)),
(LBZ iaddr:$src)>;
-def : Pat<(extloadi1 xaddr:$src),
+def : Pat<(i32 (extloadi1 xaddr:$src)),
(LBZX xaddr:$src)>;
-def : Pat<(extloadi8 iaddr:$src),
+def : Pat<(i32 (extloadi8 iaddr:$src)),
(LBZ iaddr:$src)>;
-def : Pat<(extloadi8 xaddr:$src),
+def : Pat<(i32 (extloadi8 xaddr:$src)),
(LBZX xaddr:$src)>;
-def : Pat<(extloadi16 iaddr:$src),
+def : Pat<(i32 (extloadi16 iaddr:$src)),
(LHZ iaddr:$src)>;
-def : Pat<(extloadi16 xaddr:$src),
+def : Pat<(i32 (extloadi16 xaddr:$src)),
(LHZX xaddr:$src)>;
let Predicates = [HasFPU] in {
def : Pat<(f64 (extloadf32 iaddr:$src)),
@@ -3564,23 +3575,6 @@ def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)),
(EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
(LO16 imm:$imm)), sub_eq)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)),
- (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)),
- (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)),
- (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)),
- (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)),
- (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)),
- (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>;
-
-defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)),
- (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
- (LO16 imm:$imm)), sub_eq)>;
-
def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)),
(EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>;
def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)),
@@ -3592,17 +3586,6 @@ def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)),
def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)),
(EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)),
- (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)),
- (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)),
- (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)),
- (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)),
- (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>;
-
// SETCC for i64.
def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)),
(EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>;
@@ -3632,6 +3615,47 @@ def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)),
(EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)),
(LO16 imm:$imm)), sub_eq)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+
+// Instantiations of CRNotPat for i32.
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>;
+
+// Instantiations of CRNotPat for i64.
defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)),
(EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>;
defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)),
@@ -3649,17 +3673,6 @@ defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)),
(EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)),
(LO16 imm:$imm)), sub_eq)>;
-def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)),
- (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)),
- (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>;
-def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)),
- (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)),
- (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>;
-def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)),
- (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
-
defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)),
(EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>;
defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)),
@@ -3671,6 +3684,56 @@ defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)),
defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)),
(EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+let Predicates = [HasFPU] in {
+// Instantiations of CRNotPat for f32.
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
+
+// Instantiations of CRNotPat for f64.
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
+
+// Instantiations of CRNotPat for f128.
+defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)),
+ (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)),
+ (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)),
+ (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)),
+ (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)),
+ (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)),
+ (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)),
+ (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>;
+}
+
// SETCC for f32.
let Predicates = [HasFPU] in {
def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)),
@@ -3688,21 +3751,6 @@ def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)),
def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)),
(EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)),
- (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
-
// SETCC for f64.
def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)),
(EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
@@ -3719,21 +3767,6 @@ def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)),
def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)),
(EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)),
- (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
-
// SETCC for f128.
def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOLT)),
(EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
@@ -3750,21 +3783,6 @@ def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETEQ)),
def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETUO)),
(EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>;
-defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)),
- (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>;
-
}
// This must be in this file because it relies on patterns defined in this file
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 07f38a61d098..2aad5860d87f 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -58,8 +58,12 @@ def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [
SDTCisVT<0, v4f32>, SDTCisPtrTy<1>
]>;
-def SDT_PPCfpextlh : SDTypeProfile<1, 1, [
- SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>
+def SDT_PPCfpexth : SDTypeProfile<1, 2, [
+ SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>, SDTCisPtrTy<2>
+]>;
+
+def SDT_PPCldsplat : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisPtrTy<1>
]>;
// Little-endian-specific nodes.
@@ -78,12 +82,21 @@ def SDTVecConv : SDTypeProfile<1, 2, [
def SDTVabsd : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>
]>;
-
+def SDT_PPCld_vec_be : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [
+ SDTCisVec<0>, SDTCisPtrTy<1>
+]>;
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
[SDNPHasChain, SDNPMayStore]>;
+def PPCld_vec_be : SDNode<"PPCISD::LOAD_VEC_BE", SDT_PPCld_vec_be,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def PPCst_vec_be : SDNode<"PPCISD::STORE_VEC_BE", SDT_PPCst_vec_be,
+ [SDNPHasChain, SDNPMayStore]>;
def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
@@ -93,9 +106,11 @@ def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;
-def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>;
+def PPCfpexth : SDNode<"PPCISD::FP_EXTEND_HALF", SDT_PPCfpexth, []>;
def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
string asmstr, InstrItinClass itin, Intrinsic Int,
@@ -855,14 +870,14 @@ let Uses = [RM] in {
let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
isReMaterializable = 1 in {
- def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins),
+ def XXLXORz : XX3Form_SameOp<60, 154, (outs vsrc:$XT), (ins),
"xxlxor $XT, $XT, $XT", IIC_VecGeneral,
[(set v4i32:$XT, (v4i32 immAllZerosV))]>;
- def XXLXORdpz : XX3Form_SetZero<60, 154,
+ def XXLXORdpz : XX3Form_SameOp<60, 154,
(outs vsfrc:$XT), (ins),
"xxlxor $XT, $XT, $XT", IIC_VecGeneral,
[(set f64:$XT, (fpimm0))]>;
- def XXLXORspz : XX3Form_SetZero<60, 154,
+ def XXLXORspz : XX3Form_SameOp<60, 154,
(outs vssrc:$XT), (ins),
"xxlxor $XT, $XT, $XT", IIC_VecGeneral,
[(set f32:$XT, (fpimm0))]>;
@@ -996,21 +1011,21 @@ def : Pat<(f64 (extractelt v2f64:$S, 1)),
(f64 (EXTRACT_SUBREG $S, sub_64))>;
}
-// Additional fnmsub patterns: -a*c + b == -(a*c - b)
-def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
- (XSNMSUBADP $B, $C, $A)>;
-def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
- (XSNMSUBADP $B, $C, $A)>;
+// Additional fnmsub patterns: -a*b + c == -(a*b - c)
+def : Pat<(fma (fneg f64:$A), f64:$B, f64:$C),
+ (XSNMSUBADP $C, $A, $B)>;
+def : Pat<(fma f64:$A, (fneg f64:$B), f64:$C),
+ (XSNMSUBADP $C, $A, $B)>;
-def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B),
- (XVNMSUBADP $B, $C, $A)>;
-def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B),
- (XVNMSUBADP $B, $C, $A)>;
+def : Pat<(fma (fneg v2f64:$A), v2f64:$B, v2f64:$C),
+ (XVNMSUBADP $C, $A, $B)>;
+def : Pat<(fma v2f64:$A, (fneg v2f64:$B), v2f64:$C),
+ (XVNMSUBADP $C, $A, $B)>;
-def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
- (XVNMSUBASP $B, $C, $A)>;
-def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
- (XVNMSUBASP $B, $C, $A)>;
+def : Pat<(fma (fneg v4f32:$A), v4f32:$B, v4f32:$C),
+ (XVNMSUBASP $C, $A, $B)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$B), v4f32:$C),
+ (XVNMSUBASP $C, $A, $B)>;
def : Pat<(v2f64 (bitconvert v4f32:$A)),
(COPY_TO_REGCLASS $A, VSRC)>;
@@ -1077,7 +1092,8 @@ def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)),
def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
(v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
-def : Pat<(v2f64 (PPCfpextlh v4f32:$C)), (XVCVSPDP (XXMRGHW $C, $C))>;
+def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>;
+def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>;
// Loads.
let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
@@ -1088,6 +1104,19 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
(STXVD2X $rS, xoaddr:$dst)>;
def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
}
+
+// Load vector big endian order
+let Predicates = [IsLittleEndian, HasVSX] in {
+ def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+ def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+}
+
let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
@@ -1288,6 +1317,13 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
(XXLEQV $A, $B)>;
+ let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+ isReMaterializable = 1 in {
+ def XXLEQVOnes : XX3Form_SameOp<60, 186, (outs vsrc:$XT), (ins),
+ "xxleqv $XT, $XT, $XT", IIC_VecGeneral,
+ [(set v4i32:$XT, (bitconvert (v16i8 immAllOnesV)))]>;
+ }
+
def XXLORC : XX3Form<60, 170,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxlorc $XT, $XA, $XB", IIC_VecGeneral,
@@ -1476,6 +1512,12 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
AltVSXFMARel;
}
+ // Additional xsnmsubasp patterns: -a*b + c == -(a*b - c)
+ def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C),
+ (XSNMSUBASP $C, $A, $B)>;
+ def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C),
+ (XSNMSUBASP $C, $A, $B)>;
+
// Single Precision Conversions (FP <-> INT)
def XSCVSXDSP : XX2Form<60, 312,
(outs vssrc:$XT), (ins vsfrc:$XB),
@@ -1564,16 +1606,33 @@ let Predicates = [HasDirectMove] in {
def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
"mfvsrwz $rA, $XT", IIC_VecGeneral,
[(set i32:$rA, (PPCmfvsr f64:$XT))]>;
+ let isCodeGenOnly = 1 in
+ def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsrc:$XT),
+ "mfvsrwz $rA, $XT", IIC_VecGeneral,
+ []>;
def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA),
"mtvsrd $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsra i64:$rA))]>,
Requires<[In64BitMode]>;
+ let isCodeGenOnly = 1 in
+ def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$rA),
+ "mtvsrd $XT, $rA", IIC_VecGeneral,
+ []>,
+ Requires<[In64BitMode]>;
def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA),
"mtvsrwa $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsra i32:$rA))]>;
+ let isCodeGenOnly = 1 in
+ def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$rA),
+ "mtvsrwa $XT, $rA", IIC_VecGeneral,
+ []>;
def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA),
"mtvsrwz $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
+ let isCodeGenOnly = 1 in
+ def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$rA),
+ "mtvsrwz $XT, $rA", IIC_VecGeneral,
+ []>;
} // HasDirectMove
let Predicates = [IsISA3_0, HasDirectMove] in {
@@ -1597,6 +1656,22 @@ def : InstAlias<"mfvrd $rA, $XT",
(MFVRD g8rc:$rA, vrrc:$XT), 0>;
def : InstAlias<"mffprd $rA, $src",
(MFVSRD g8rc:$rA, f8rc:$src)>;
+def : InstAlias<"mtvrd $XT, $rA",
+ (MTVRD vrrc:$XT, g8rc:$rA), 0>;
+def : InstAlias<"mtfprd $dst, $rA",
+ (MTVSRD f8rc:$dst, g8rc:$rA)>;
+def : InstAlias<"mfvrwz $rA, $XT",
+ (MFVRWZ gprc:$rA, vrrc:$XT), 0>;
+def : InstAlias<"mffprwz $rA, $src",
+ (MFVSRWZ gprc:$rA, f8rc:$src)>;
+def : InstAlias<"mtvrwa $XT, $rA",
+ (MTVRWA vrrc:$XT, gprc:$rA), 0>;
+def : InstAlias<"mtfprwa $dst, $rA",
+ (MTVSRWA f8rc:$dst, gprc:$rA)>;
+def : InstAlias<"mtvrwz $XT, $rA",
+ (MTVRWZ vrrc:$XT, gprc:$rA), 0>;
+def : InstAlias<"mtfprwz $dst, $rA",
+ (MTVSRWZ f8rc:$dst, gprc:$rA)>;
/* Direct moves of various widths from GPR's into VSR's. Each move lines
the value up into element 0 (both BE and LE). Namely, entities smaller than
@@ -2581,9 +2656,9 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(fneg (int_ppc_fmaf128_round_to_odd
f128:$vA, f128:$vB, (fneg f128:$vTi))))]>;
- // Additional fnmsub patterns: -a*c + b == -(a*c - b)
- def : Pat<(fma (fneg f128:$A), f128:$C, f128:$B), (XSNMSUBQP $B, $C, $A)>;
- def : Pat<(fma f128:$A, (fneg f128:$C), f128:$B), (XSNMSUBQP $B, $C, $A)>;
+ // Additional fnmsub patterns: -a*b + c == -(a*b - c)
+ def : Pat<(fma (fneg f128:$A), f128:$B, f128:$C), (XSNMSUBQP $C, $A, $B)>;
+ def : Pat<(fma f128:$A, (fneg f128:$B), f128:$C), (XSNMSUBQP $C, $A, $B)>;
//===--------------------------------------------------------------------===//
// Quad/Double-Precision Compare Instructions:
@@ -2799,12 +2874,12 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
"xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
[(set v4i32: $XT,
- (int_ppc_vsx_xvtstdcsp v4f32:$XB, imm:$DCMX))]>;
+ (int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>;
def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5,
(outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
"xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
[(set v2i64: $XT,
- (int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>;
+ (int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>;
//===--------------------------------------------------------------------===//
@@ -3024,6 +3099,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
+
+ def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>;
+ def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst),
+ (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
+
+ def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>;
+ def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst),
+ (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
} // IsLittleEndian, HasP9Vector
let Predicates = [IsBigEndian, HasP9Vector] in {
@@ -3059,7 +3144,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
- } // IsLittleEndian, HasP9Vector
+ } // IsBigEndian, HasP9Vector
// D-Form Load/Store
def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
@@ -3858,6 +3943,10 @@ let AddedComplexity = 400 in {
(XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
(v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
+ def : Pat<(v2f64 (PPCldsplat xoaddr:$A)),
+ (v2f64 (LXVDSX xoaddr:$A))>;
+ def : Pat<(v2i64 (PPCldsplat xoaddr:$A)),
+ (v2i64 (LXVDSX xoaddr:$A))>;
// Build vectors of floating point converted to i64.
def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
@@ -4063,27 +4152,32 @@ let AddedComplexity = 400 in {
(XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
}
+ let Predicates = [HasP8Vector] in {
+ def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))),
+ (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+ def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))),
+ (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+ def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))),
+ (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+ def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))),
+ (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+ }
+
let Predicates = [HasP9Vector] in {
// Endianness-neutral patterns for const splats with ISA 3.0 instructions.
def : Pat<(v4i32 (scalar_to_vector i32:$A)),
(v4i32 (MTVSRWS $A))>;
def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
(v4i32 (MTVSRWS $A))>;
- def : Pat<(v16i8 (build_vector immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
- immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
- immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
- immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
- immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
- immAnyExt8:$A)),
+ def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)),
(v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
- def : Pat<(v16i8 immAllOnesV),
- (v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
- def : Pat<(v8i16 immAllOnesV),
- (v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
- def : Pat<(v4i32 immAllOnesV),
- (v4i32 (XXSPLTIB 255))>;
- def : Pat<(v2i64 immAllOnesV),
- (v2i64 (XXSPLTIB 255))>;
def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
(v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>;
def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
@@ -4102,6 +4196,10 @@ let AddedComplexity = 400 in {
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
(DFLOADf32 iaddrX4:$A),
VSFRC)), 0))>;
+ def : Pat<(v4f32 (PPCldsplat xoaddr:$A)),
+ (v4f32 (LXVWSX xoaddr:$A))>;
+ def : Pat<(v4i32 (PPCldsplat xoaddr:$A)),
+ (v4i32 (LXVWSX xoaddr:$A))>;
}
let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 4d45d96d4479..d252cfbd26b1 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -63,8 +63,24 @@ static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars",
cl::desc("Potential PHI threshold for PPC preinc loop prep"));
STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form");
+STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten");
namespace {
+ struct BucketElement {
+ BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
+ BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
+
+ const SCEVConstant *Offset;
+ Instruction *Instr;
+ };
+
+ struct Bucket {
+ Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B),
+ Elements(1, BucketElement(I)) {}
+
+ const SCEV *BaseSCEV;
+ SmallVector<BucketElement, 16> Elements;
+ };
class PPCLoopPreIncPrep : public FunctionPass {
public:
@@ -85,21 +101,47 @@ namespace {
AU.addRequired<ScalarEvolutionWrapperPass>();
}
- bool alreadyPrepared(Loop *L, Instruction* MemI,
- const SCEV *BasePtrStartSCEV,
- const SCEVConstant *BasePtrIncSCEV);
bool runOnFunction(Function &F) override;
- bool runOnLoop(Loop *L);
- void simplifyLoopLatch(Loop *L);
- bool rotateLoop(Loop *L);
-
private:
PPCTargetMachine *TM = nullptr;
+ const PPCSubtarget *ST;
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
bool PreserveLCSSA;
+
+ bool runOnLoop(Loop *L);
+
+ /// Check if required PHI node is already exist in Loop \p L.
+ bool alreadyPrepared(Loop *L, Instruction* MemI,
+ const SCEV *BasePtrStartSCEV,
+ const SCEVConstant *BasePtrIncSCEV);
+
+ /// Collect condition matched(\p isValidCandidate() returns true)
+ /// candidates in Loop \p L.
+ SmallVector<Bucket, 16>
+ collectCandidates(Loop *L,
+ std::function<bool(const Instruction *, const Value *)>
+ isValidCandidate,
+ unsigned MaxCandidateNum);
+
+ /// Add a candidate to candidates \p Buckets.
+ void addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
+ SmallVector<Bucket, 16> &Buckets,
+ unsigned MaxCandidateNum);
+
+ /// Prepare all candidates in \p Buckets for update form.
+ bool updateFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets);
+
+ /// Prepare for one chain \p BucketChain, find the best base element and
+ /// update all other elements in \p BucketChain accordingly.
+ bool prepareBaseForUpdateFormChain(Bucket &BucketChain);
+
+ /// Rewrite load/store instructions in \p BucketChain according to
+ /// preparation.
+ bool rewriteLoadStores(Loop *L, Bucket &BucketChain,
+ SmallSet<BasicBlock *, 16> &BBChanged);
};
} // end anonymous namespace
@@ -111,30 +153,15 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
+static const std::string PHINodeNameSuffix = ".phi";
+static const std::string CastNodeNameSuffix = ".cast";
+static const std::string GEPNodeIncNameSuffix = ".inc";
+static const std::string GEPNodeOffNameSuffix = ".off";
+
FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
return new PPCLoopPreIncPrep(TM);
}
-namespace {
-
- struct BucketElement {
- BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
- BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
-
- const SCEVConstant *Offset;
- Instruction *Instr;
- };
-
- struct Bucket {
- Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B),
- Elements(1, BucketElement(I)) {}
-
- const SCEV *BaseSCEV;
- SmallVector<BucketElement, 16> Elements;
- };
-
-} // end anonymous namespace
-
static bool IsPtrInBounds(Value *BasePtr) {
Value *StrippedBasePtr = BasePtr;
while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr))
@@ -145,6 +172,14 @@ static bool IsPtrInBounds(Value *BasePtr) {
return false;
}
+static std::string getInstrName(const Value *I, const std::string Suffix) {
+ assert(I && "Invalid paramater!");
+ if (I->hasName())
+ return (I->getName() + Suffix).str();
+ else
+ return "";
+}
+
static Value *GetPointerOperand(Value *MemI) {
if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
return LMemI->getPointerOperand();
@@ -167,6 +202,7 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+ ST = TM ? TM->getSubtargetImpl(F) : nullptr;
bool MadeChange = false;
@@ -177,10 +213,280 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
return MadeChange;
}
+void PPCLoopPreIncPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
+ SmallVector<Bucket, 16> &Buckets,
+ unsigned MaxCandidateNum) {
+ assert((MemI && GetPointerOperand(MemI)) &&
+ "Candidate should be a memory instruction.");
+ assert(LSCEV && "Invalid SCEV for Ptr value.");
+ bool FoundBucket = false;
+ for (auto &B : Buckets) {
+ const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
+ if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) {
+ B.Elements.push_back(BucketElement(CDiff, MemI));
+ FoundBucket = true;
+ break;
+ }
+ }
+
+ if (!FoundBucket) {
+ if (Buckets.size() == MaxCandidateNum)
+ return;
+ Buckets.push_back(Bucket(LSCEV, MemI));
+ }
+}
+
+SmallVector<Bucket, 16> PPCLoopPreIncPrep::collectCandidates(
+ Loop *L,
+ std::function<bool(const Instruction *, const Value *)> isValidCandidate,
+ unsigned MaxCandidateNum) {
+ SmallVector<Bucket, 16> Buckets;
+ for (const auto &BB : L->blocks())
+ for (auto &J : *BB) {
+ Value *PtrValue;
+ Instruction *MemI;
+
+ if (LoadInst *LMemI = dyn_cast<LoadInst>(&J)) {
+ MemI = LMemI;
+ PtrValue = LMemI->getPointerOperand();
+ } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&J)) {
+ MemI = SMemI;
+ PtrValue = SMemI->getPointerOperand();
+ } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(&J)) {
+ if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
+ MemI = IMemI;
+ PtrValue = IMemI->getArgOperand(0);
+ } else continue;
+ } else continue;
+
+ unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
+ if (PtrAddrSpace)
+ continue;
+
+ if (L->isLoopInvariant(PtrValue))
+ continue;
+
+ const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
+ const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
+ if (!LARSCEV || LARSCEV->getLoop() != L)
+ continue;
+
+ if (isValidCandidate(&J, PtrValue))
+ addOneCandidate(MemI, LSCEV, Buckets, MaxCandidateNum);
+ }
+ return Buckets;
+}
+
+// TODO: implement a more clever base choosing policy.
+// Currently we always choose an exist load/store offset. This maybe lead to
+// suboptimal code sequences. For example, for one DS chain with offsets
+// {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp
+// for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a
+// multipler of 4, it cannot be represented by sint16.
+bool PPCLoopPreIncPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
+ // We have a choice now of which instruction's memory operand we use as the
+ // base for the generated PHI. Always picking the first instruction in each
+ // bucket does not work well, specifically because that instruction might
+ // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
+ // the choice is somewhat arbitrary, because the backend will happily
+ // generate direct offsets from both the pre-incremented and
+ // post-incremented pointer values. Thus, we'll pick the first non-prefetch
+ // instruction in each bucket, and adjust the recurrence and other offsets
+ // accordingly.
+ for (int j = 0, je = BucketChain.Elements.size(); j != je; ++j) {
+ if (auto *II = dyn_cast<IntrinsicInst>(BucketChain.Elements[j].Instr))
+ if (II->getIntrinsicID() == Intrinsic::prefetch)
+ continue;
+
+ // If we'd otherwise pick the first element anyway, there's nothing to do.
+ if (j == 0)
+ break;
+
+ // If our chosen element has no offset from the base pointer, there's
+ // nothing to do.
+ if (!BucketChain.Elements[j].Offset ||
+ BucketChain.Elements[j].Offset->isZero())
+ break;
+
+ const SCEV *Offset = BucketChain.Elements[j].Offset;
+ BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);
+ for (auto &E : BucketChain.Elements) {
+ if (E.Offset)
+ E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
+ else
+ E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
+ }
+
+ std::swap(BucketChain.Elements[j], BucketChain.Elements[0]);
+ break;
+ }
+ return true;
+}
+
+bool PPCLoopPreIncPrep::rewriteLoadStores(
+ Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged) {
+ bool MadeChange = false;
+ const SCEVAddRecExpr *BasePtrSCEV =
+ cast<SCEVAddRecExpr>(BucketChain.BaseSCEV);
+ if (!BasePtrSCEV->isAffine())
+ return MadeChange;
+
+ LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
+
+ assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?");
+
+ // The instruction corresponding to the Bucket's BaseSCEV must be the first
+ // in the vector of elements.
+ Instruction *MemI = BucketChain.Elements.begin()->Instr;
+ Value *BasePtr = GetPointerOperand(MemI);
+ assert(BasePtr && "No pointer operand");
+
+ Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
+ Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
+ BasePtr->getType()->getPointerAddressSpace());
+
+ const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart();
+ if (!SE->isLoopInvariant(BasePtrStartSCEV, L))
+ return MadeChange;
+
+ const SCEVConstant *BasePtrIncSCEV =
+ dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
+ if (!BasePtrIncSCEV)
+ return MadeChange;
+ BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV);
+ if (!isSafeToExpand(BasePtrStartSCEV, *SE))
+ return MadeChange;
+
+ if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV))
+ return MadeChange;
+
+ LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
+
+ BasicBlock *Header = L->getHeader();
+ unsigned HeaderLoopPredCount = pred_size(Header);
+ BasicBlock *LoopPredecessor = L->getLoopPredecessor();
+
+ PHINode *NewPHI =
+ PHINode::Create(I8PtrTy, HeaderLoopPredCount,
+ getInstrName(MemI, PHINodeNameSuffix),
+ Header->getFirstNonPHI());
+
+ SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
+ Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
+ LoopPredecessor->getTerminator());
+
+ // Note that LoopPredecessor might occur in the predecessor list multiple
+ // times, and we need to add it the right number of times.
+ for (const auto &PI : predecessors(Header)) {
+ if (PI != LoopPredecessor)
+ continue;
+
+ NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
+ }
+
+ Instruction *InsPoint = &*Header->getFirstInsertionPt();
+ GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
+ getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
+ PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
+ for (const auto &PI : predecessors(Header)) {
+ if (PI == LoopPredecessor)
+ continue;
+
+ NewPHI->addIncoming(PtrInc, PI);
+ }
+
+ Instruction *NewBasePtr;
+ if (PtrInc->getType() != BasePtr->getType())
+ NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(),
+ getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
+ else
+ NewBasePtr = PtrInc;
+
+ if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
+ BBChanged.insert(IDel->getParent());
+ BasePtr->replaceAllUsesWith(NewBasePtr);
+ RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
+
+ // Keep track of the replacement pointer values we've inserted so that we
+ // don't generate more pointer values than necessary.
+ SmallPtrSet<Value *, 16> NewPtrs;
+ NewPtrs.insert(NewBasePtr);
+
+ for (auto I = std::next(BucketChain.Elements.begin()),
+ IE = BucketChain.Elements.end(); I != IE; ++I) {
+ Value *Ptr = GetPointerOperand(I->Instr);
+ assert(Ptr && "No pointer operand");
+ if (NewPtrs.count(Ptr))
+ continue;
+
+ Instruction *RealNewPtr;
+ if (!I->Offset || I->Offset->getValue()->isZero()) {
+ RealNewPtr = NewBasePtr;
+ } else {
+ Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
+ if (PtrIP && isa<Instruction>(NewBasePtr) &&
+ cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
+ PtrIP = nullptr;
+ else if (PtrIP && isa<PHINode>(PtrIP))
+ PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
+ else if (!PtrIP)
+ PtrIP = I->Instr;
+
+ GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
+ I8Ty, PtrInc, I->Offset->getValue(),
+ getInstrName(I->Instr, GEPNodeOffNameSuffix), PtrIP);
+ if (!PtrIP)
+ NewPtr->insertAfter(cast<Instruction>(PtrInc));
+ NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
+ RealNewPtr = NewPtr;
+ }
+
+ if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
+ BBChanged.insert(IDel->getParent());
+
+ Instruction *ReplNewPtr;
+ if (Ptr->getType() != RealNewPtr->getType()) {
+ ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
+ getInstrName(Ptr, CastNodeNameSuffix));
+ ReplNewPtr->insertAfter(RealNewPtr);
+ } else
+ ReplNewPtr = RealNewPtr;
+
+ Ptr->replaceAllUsesWith(ReplNewPtr);
+ RecursivelyDeleteTriviallyDeadInstructions(Ptr);
+
+ NewPtrs.insert(RealNewPtr);
+ }
+
+ MadeChange = true;
+ UpdFormChainRewritten++;
+
+ return MadeChange;
+}
+
+bool PPCLoopPreIncPrep::updateFormPrep(Loop *L,
+ SmallVector<Bucket, 16> &Buckets) {
+ bool MadeChange = false;
+ if (Buckets.empty())
+ return MadeChange;
+ SmallSet<BasicBlock *, 16> BBChanged;
+ for (auto &Bucket : Buckets)
+ // The base address of each bucket is transformed into a phi and the others
+ // are rewritten based on new base.
+ if (prepareBaseForUpdateFormChain(Bucket))
+ MadeChange |= rewriteLoadStores(L, Bucket, BBChanged);
+ if (MadeChange)
+ for (auto &BB : L->blocks())
+ if (BBChanged.count(BB))
+ DeleteDeadPHIs(BB);
+ return MadeChange;
+}
+
// In order to prepare for the pre-increment a PHI is added.
// This function will check to see if that PHI already exists and will return
-// true if it found an existing PHI with the same start and increment as the
-// one we wanted to create.
+// true if it found an existing PHI with the same start and increment as the
+// one we wanted to create.
bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI,
const SCEV *BasePtrStartSCEV,
const SCEVConstant *BasePtrIncSCEV) {
@@ -216,10 +522,10 @@ bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI,
continue;
if (CurrentPHINode->getNumIncomingValues() == 2) {
- if ( (CurrentPHINode->getIncomingBlock(0) == LatchBB &&
- CurrentPHINode->getIncomingBlock(1) == PredBB) ||
- (CurrentPHINode->getIncomingBlock(1) == LatchBB &&
- CurrentPHINode->getIncomingBlock(0) == PredBB) ) {
+ if ((CurrentPHINode->getIncomingBlock(0) == LatchBB &&
+ CurrentPHINode->getIncomingBlock(1) == PredBB) ||
+ (CurrentPHINode->getIncomingBlock(1) == LatchBB &&
+ CurrentPHINode->getIncomingBlock(0) == PredBB)) {
if (PHIBasePtrSCEV->getStart() == BasePtrStartSCEV &&
PHIBasePtrIncSCEV == BasePtrIncSCEV) {
// The existing PHI (CurrentPHINode) has the same start and increment
@@ -242,89 +548,6 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
- BasicBlock *Header = L->getHeader();
-
- const PPCSubtarget *ST =
- TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr;
-
- unsigned HeaderLoopPredCount = pred_size(Header);
-
- // Collect buckets of comparable addresses used by loads and stores.
- SmallVector<Bucket, 16> Buckets;
- for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
- I != IE; ++I) {
- for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
- J != JE; ++J) {
- Value *PtrValue;
- Instruction *MemI;
-
- if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
- MemI = LMemI;
- PtrValue = LMemI->getPointerOperand();
- } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
- MemI = SMemI;
- PtrValue = SMemI->getPointerOperand();
- } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(J)) {
- if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
- MemI = IMemI;
- PtrValue = IMemI->getArgOperand(0);
- } else continue;
- } else continue;
-
- unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
- if (PtrAddrSpace)
- continue;
-
- // There are no update forms for Altivec vector load/stores.
- if (ST && ST->hasAltivec() &&
- PtrValue->getType()->getPointerElementType()->isVectorTy())
- continue;
-
- if (L->isLoopInvariant(PtrValue))
- continue;
-
- const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
- if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) {
- if (LARSCEV->getLoop() != L)
- continue;
- // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
- // be 4's multiple (DS-form). For i64 loads/stores when the displacement
- // fits in a 16-bit signed field but isn't a multiple of 4, it will be
- // useless and possible to break some original well-form addressing mode
- // to make this pre-inc prep for it.
- if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) {
- if (const SCEVConstant *StepConst =
- dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) {
- const APInt &ConstInt = StepConst->getValue()->getValue();
- if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0)
- continue;
- }
- }
- } else {
- continue;
- }
-
- bool FoundBucket = false;
- for (auto &B : Buckets) {
- const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
- if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) {
- B.Elements.push_back(BucketElement(CDiff, MemI));
- FoundBucket = true;
- break;
- }
- }
-
- if (!FoundBucket) {
- if (Buckets.size() == MaxVars)
- return MadeChange;
- Buckets.push_back(Bucket(LSCEV, MemI));
- }
- }
- }
-
- if (Buckets.empty())
- return MadeChange;
-
BasicBlock *LoopPredecessor = L->getLoopPredecessor();
// If there is no loop predecessor, or the loop predecessor's terminator
// returns a value (which might contribute to determining the loop's
@@ -335,191 +558,48 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
if (LoopPredecessor)
MadeChange = true;
}
- if (!LoopPredecessor)
+ if (!LoopPredecessor) {
+ LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n");
return MadeChange;
+ }
- LLVM_DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n");
-
- SmallSet<BasicBlock *, 16> BBChanged;
- for (unsigned i = 0, e = Buckets.size(); i != e; ++i) {
- // The base address of each bucket is transformed into a phi and the others
- // are rewritten as offsets of that variable.
-
- // We have a choice now of which instruction's memory operand we use as the
- // base for the generated PHI. Always picking the first instruction in each
- // bucket does not work well, specifically because that instruction might
- // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
- // the choice is somewhat arbitrary, because the backend will happily
- // generate direct offsets from both the pre-incremented and
- // post-incremented pointer values. Thus, we'll pick the first non-prefetch
- // instruction in each bucket, and adjust the recurrence and other offsets
- // accordingly.
- for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) {
- if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr))
- if (II->getIntrinsicID() == Intrinsic::prefetch)
- continue;
-
- // If we'd otherwise pick the first element anyway, there's nothing to do.
- if (j == 0)
- break;
-
- // If our chosen element has no offset from the base pointer, there's
- // nothing to do.
- if (!Buckets[i].Elements[j].Offset ||
- Buckets[i].Elements[j].Offset->isZero())
- break;
-
- const SCEV *Offset = Buckets[i].Elements[j].Offset;
- Buckets[i].BaseSCEV = SE->getAddExpr(Buckets[i].BaseSCEV, Offset);
- for (auto &E : Buckets[i].Elements) {
- if (E.Offset)
- E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
- else
- E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
- }
-
- std::swap(Buckets[i].Elements[j], Buckets[i].Elements[0]);
- break;
- }
-
- const SCEVAddRecExpr *BasePtrSCEV =
- cast<SCEVAddRecExpr>(Buckets[i].BaseSCEV);
- if (!BasePtrSCEV->isAffine())
- continue;
-
- LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
- assert(BasePtrSCEV->getLoop() == L &&
- "AddRec for the wrong loop?");
-
- // The instruction corresponding to the Bucket's BaseSCEV must be the first
- // in the vector of elements.
- Instruction *MemI = Buckets[i].Elements.begin()->Instr;
- Value *BasePtr = GetPointerOperand(MemI);
- assert(BasePtr && "No pointer operand");
-
- Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
- Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
- BasePtr->getType()->getPointerAddressSpace());
-
- const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart();
- if (!SE->isLoopInvariant(BasePtrStartSCEV, L))
- continue;
-
- const SCEVConstant *BasePtrIncSCEV =
- dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
- if (!BasePtrIncSCEV)
- continue;
- BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV);
- if (!isSafeToExpand(BasePtrStartSCEV, *SE))
- continue;
-
- LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
-
- if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV))
- continue;
-
- PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
- MemI->hasName() ? MemI->getName() + ".phi" : "",
- Header->getFirstNonPHI());
-
- SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
- Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
- LoopPredecessor->getTerminator());
-
- // Note that LoopPredecessor might occur in the predecessor list multiple
- // times, and we need to add it the right number of times.
- for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
- PI != PE; ++PI) {
- if (*PI != LoopPredecessor)
- continue;
-
- NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
- }
-
- Instruction *InsPoint = &*Header->getFirstInsertionPt();
- GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
- I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
- MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint);
- PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
- for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
- PI != PE; ++PI) {
- if (*PI == LoopPredecessor)
- continue;
-
- NewPHI->addIncoming(PtrInc, *PI);
- }
-
- Instruction *NewBasePtr;
- if (PtrInc->getType() != BasePtr->getType())
- NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(),
- PtrInc->hasName() ? PtrInc->getName() + ".cast" : "", InsPoint);
- else
- NewBasePtr = PtrInc;
-
- if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
- BBChanged.insert(IDel->getParent());
- BasePtr->replaceAllUsesWith(NewBasePtr);
- RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
-
- // Keep track of the replacement pointer values we've inserted so that we
- // don't generate more pointer values than necessary.
- SmallPtrSet<Value *, 16> NewPtrs;
- NewPtrs.insert( NewBasePtr);
-
- for (auto I = std::next(Buckets[i].Elements.begin()),
- IE = Buckets[i].Elements.end(); I != IE; ++I) {
- Value *Ptr = GetPointerOperand(I->Instr);
- assert(Ptr && "No pointer operand");
- if (NewPtrs.count(Ptr))
- continue;
-
- Instruction *RealNewPtr;
- if (!I->Offset || I->Offset->getValue()->isZero()) {
- RealNewPtr = NewBasePtr;
- } else {
- Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
- if (PtrIP && isa<Instruction>(NewBasePtr) &&
- cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
- PtrIP = nullptr;
- else if (isa<PHINode>(PtrIP))
- PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
- else if (!PtrIP)
- PtrIP = I->Instr;
-
- GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
- I8Ty, PtrInc, I->Offset->getValue(),
- I->Instr->hasName() ? I->Instr->getName() + ".off" : "", PtrIP);
- if (!PtrIP)
- NewPtr->insertAfter(cast<Instruction>(PtrInc));
- NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
- RealNewPtr = NewPtr;
+ // Check if a load/store has update form. This lambda is used by function
+ // collectCandidates which can collect candidates for types defined by lambda.
+ auto isUpdateFormCandidate = [&] (const Instruction *I,
+ const Value *PtrValue) {
+ assert((PtrValue && I) && "Invalid parameter!");
+ // There are no update forms for Altivec vector load/stores.
+ if (ST && ST->hasAltivec() &&
+ PtrValue->getType()->getPointerElementType()->isVectorTy())
+ return false;
+ // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
+ // be 4's multiple (DS-form). For i64 loads/stores when the displacement
+ // fits in a 16-bit signed field but isn't a multiple of 4, it will be
+ // useless and possible to break some original well-form addressing mode
+ // to make this pre-inc prep for it.
+ if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) {
+ const SCEV *LSCEV = SE->getSCEVAtScope(const_cast<Value *>(PtrValue), L);
+ const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
+ if (!LARSCEV || LARSCEV->getLoop() != L)
+ return false;
+ if (const SCEVConstant *StepConst =
+ dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) {
+ const APInt &ConstInt = StepConst->getValue()->getValue();
+ if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0)
+ return false;
}
-
- if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
- BBChanged.insert(IDel->getParent());
-
- Instruction *ReplNewPtr;
- if (Ptr->getType() != RealNewPtr->getType()) {
- ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
- Ptr->hasName() ? Ptr->getName() + ".cast" : "");
- ReplNewPtr->insertAfter(RealNewPtr);
- } else
- ReplNewPtr = RealNewPtr;
-
- Ptr->replaceAllUsesWith(ReplNewPtr);
- RecursivelyDeleteTriviallyDeadInstructions(Ptr);
-
- NewPtrs.insert(RealNewPtr);
}
+ return true;
+ };
- MadeChange = true;
- }
+ // Collect buckets of comparable addresses used by loads, stores and prefetch
+ // intrinsic for update form.
+ SmallVector<Bucket, 16> UpdateFormBuckets =
+ collectCandidates(L, isUpdateFormCandidate, MaxVars);
- for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
- I != IE; ++I) {
- if (BBChanged.count(*I))
- DeleteDeadPHIs(*I);
- }
+ // Prepare for update form.
+ if (!UpdateFormBuckets.empty())
+ MadeChange |= updateFormPrep(L, UpdateFormBuckets);
return MadeChange;
}
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 027e6bd1ba06..b6496f189a3a 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -79,7 +79,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
}
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
- AsmPrinter &Printer, bool isDarwin) {
+ AsmPrinter &Printer, bool IsDarwin) {
MCContext &Ctx = Printer.OutContext;
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
@@ -137,10 +137,10 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
// Add ha16() / lo16() markers if required.
switch (access) {
case PPCII::MO_LO:
- Expr = PPCMCExpr::createLo(Expr, isDarwin, Ctx);
+ Expr = PPCMCExpr::createLo(Expr, IsDarwin, Ctx);
break;
case PPCII::MO_HA:
- Expr = PPCMCExpr::createHa(Expr, isDarwin, Ctx);
+ Expr = PPCMCExpr::createHa(Expr, IsDarwin, Ctx);
break;
}
@@ -148,20 +148,20 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
}
void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AsmPrinter &AP, bool isDarwin) {
+ AsmPrinter &AP, bool IsDarwin) {
OutMI.setOpcode(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MCOperand MCOp;
if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP,
- isDarwin))
+ IsDarwin))
OutMI.addOperand(MCOp);
}
}
bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
MCOperand &OutMO, AsmPrinter &AP,
- bool isDarwin) {
+ bool IsDarwin) {
switch (MO.getType()) {
default:
llvm_unreachable("unknown operand type");
@@ -181,17 +181,20 @@ bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
return true;
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol:
- OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin);
+ OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, IsDarwin);
return true;
case MachineOperand::MO_JumpTableIndex:
- OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin);
+ OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, IsDarwin);
return true;
case MachineOperand::MO_ConstantPoolIndex:
- OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin);
+ OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, IsDarwin);
return true;
case MachineOperand::MO_BlockAddress:
OutMO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP,
- isDarwin);
+ IsDarwin);
+ return true;
+ case MachineOperand::MO_MCSymbol:
+ OutMO = GetSymbolRef(MO, MO.getMCSymbol(), AP, IsDarwin);
return true;
case MachineOperand::MO_RegisterMask:
return false;
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index 446246358e96..ac8ac060f460 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -148,8 +148,8 @@ static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
if (!Op->isReg())
return nullptr;
- unsigned Reg = Op->getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = Op->getReg();
+ if (!Register::isVirtualRegister(Reg))
return nullptr;
return MRI->getVRegDef(Reg);
@@ -344,8 +344,7 @@ bool PPCMIPeephole::simplifyCode(void) {
unsigned TrueReg2 =
TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
- if (TrueReg1 == TrueReg2
- && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
+ if (TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)) {
MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
@@ -358,7 +357,7 @@ bool PPCMIPeephole::simplifyCode(void) {
return false;
unsigned DefReg =
TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
- if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
+ if (Register::isVirtualRegister(DefReg)) {
MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
return true;
@@ -444,7 +443,7 @@ bool PPCMIPeephole::simplifyCode(void) {
unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
unsigned TrueReg =
TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
- if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
+ if (!Register::isVirtualRegister(TrueReg))
break;
MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
if (!DefMI)
@@ -453,8 +452,8 @@ bool PPCMIPeephole::simplifyCode(void) {
auto isConvertOfSplat = [=]() -> bool {
if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
return false;
- unsigned ConvReg = DefMI->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
+ Register ConvReg = DefMI->getOperand(1).getReg();
+ if (!Register::isVirtualRegister(ConvReg))
return false;
MachineInstr *Splt = MRI->getVRegDef(ConvReg);
return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
@@ -481,9 +480,9 @@ bool PPCMIPeephole::simplifyCode(void) {
// Splat fed by a shift. Usually when we align value to splat into
// vector element zero.
if (DefOpcode == PPC::XXSLDWI) {
- unsigned ShiftRes = DefMI->getOperand(0).getReg();
- unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
- unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
+ Register ShiftRes = DefMI->getOperand(0).getReg();
+ Register ShiftOp1 = DefMI->getOperand(1).getReg();
+ Register ShiftOp2 = DefMI->getOperand(2).getReg();
unsigned ShiftImm = DefMI->getOperand(3).getImm();
unsigned SplatImm = MI.getOperand(2).getImm();
if (ShiftOp1 == ShiftOp2) {
@@ -507,7 +506,7 @@ bool PPCMIPeephole::simplifyCode(void) {
// If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
unsigned TrueReg =
TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
- if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
+ if (!Register::isVirtualRegister(TrueReg))
break;
MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
@@ -518,8 +517,8 @@ bool PPCMIPeephole::simplifyCode(void) {
TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
unsigned DefsReg2 =
TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
- if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
- !TargetRegisterInfo::isVirtualRegister(DefsReg2))
+ if (!Register::isVirtualRegister(DefsReg1) ||
+ !Register::isVirtualRegister(DefsReg2))
break;
MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
@@ -533,8 +532,8 @@ bool PPCMIPeephole::simplifyCode(void) {
if (RoundInstr->getOpcode() == PPC::FRSP &&
MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
Simplified = true;
- unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
- unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
+ Register ConvReg1 = RoundInstr->getOperand(1).getReg();
+ Register FRSPDefines = RoundInstr->getOperand(0).getReg();
MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
if (Use.getOperand(i).isReg() &&
@@ -566,8 +565,8 @@ bool PPCMIPeephole::simplifyCode(void) {
case PPC::EXTSH8:
case PPC::EXTSH8_32_64: {
if (!EnableSExtElimination) break;
- unsigned NarrowReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
+ Register NarrowReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(NarrowReg))
break;
MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
@@ -610,8 +609,8 @@ bool PPCMIPeephole::simplifyCode(void) {
case PPC::EXTSW_32:
case PPC::EXTSW_32_64: {
if (!EnableSExtElimination) break;
- unsigned NarrowReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
+ Register NarrowReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(NarrowReg))
break;
MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
@@ -652,8 +651,8 @@ bool PPCMIPeephole::simplifyCode(void) {
// We can eliminate EXTSW if the input is known to be already
// sign-extended.
LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
- unsigned TmpReg =
- MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
+ Register TmpReg =
+ MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
TmpReg);
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
@@ -679,8 +678,8 @@ bool PPCMIPeephole::simplifyCode(void) {
if (MI.getOperand(2).getImm() != 0)
break;
- unsigned SrcReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
break;
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
@@ -695,8 +694,8 @@ bool PPCMIPeephole::simplifyCode(void) {
SrcMI = SubRegMI;
if (SubRegMI->getOpcode() == PPC::COPY) {
- unsigned CopyReg = SubRegMI->getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(CopyReg))
+ Register CopyReg = SubRegMI->getOperand(1).getReg();
+ if (Register::isVirtualRegister(CopyReg))
SrcMI = MRI->getVRegDef(CopyReg);
}
@@ -757,7 +756,7 @@ bool PPCMIPeephole::simplifyCode(void) {
break; // We don't have an ADD fed by LI's that can be transformed
// Now we know that Op1 is the PHI node and Op2 is the dominator
- unsigned DominatorReg = Op2.getReg();
+ Register DominatorReg = Op2.getReg();
const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
? &PPC::G8RC_and_G8RC_NOX0RegClass
@@ -927,7 +926,7 @@ static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
}
else if (Inst->isFullCopy())
NextReg = Inst->getOperand(1).getReg();
- if (NextReg == SrcReg || !TargetRegisterInfo::isVirtualRegister(NextReg))
+ if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
break;
SrcReg = NextReg;
}
@@ -949,9 +948,8 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
(*BII).getOpcode() == PPC::BCC &&
(*BII).getOperand(1).isReg()) {
// We optimize only if the condition code is used only by one BCC.
- unsigned CndReg = (*BII).getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(CndReg) ||
- !MRI->hasOneNonDBGUse(CndReg))
+ Register CndReg = (*BII).getOperand(1).getReg();
+ if (!Register::isVirtualRegister(CndReg) || !MRI->hasOneNonDBGUse(CndReg))
return false;
MachineInstr *CMPI = MRI->getVRegDef(CndReg);
@@ -961,7 +959,7 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
// We skip this BB if a physical register is used in comparison.
for (MachineOperand &MO : CMPI->operands())
- if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (MO.isReg() && !Register::isVirtualRegister(MO.getReg()))
return false;
return true;
@@ -1271,8 +1269,8 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) {
// We touch up the compare instruction in MBB2 and move it to
// a previous BB to handle partially redundant case.
if (SwapOperands) {
- unsigned Op1 = CMPI2->getOperand(1).getReg();
- unsigned Op2 = CMPI2->getOperand(2).getReg();
+ Register Op1 = CMPI2->getOperand(1).getReg();
+ Register Op2 = CMPI2->getOperand(2).getReg();
CMPI2->getOperand(1).setReg(Op2);
CMPI2->getOperand(2).setReg(Op1);
}
@@ -1295,7 +1293,7 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) {
MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
DebugLoc DL = CMPI2->getDebugLoc();
- unsigned NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
+ Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
BuildMI(MBB2, MBB2.begin(), DL,
TII->get(PPC::PHI), NewVReg)
.addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
@@ -1334,8 +1332,8 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
if (MI.getOpcode() != PPC::RLDICR)
return false;
- unsigned SrcReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
return false;
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
@@ -1414,8 +1412,8 @@ bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
if (SHMI + MEMI != 63)
return false;
- unsigned SrcReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
return false;
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
@@ -1428,6 +1426,12 @@ bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
if (!MRI->hasOneNonDBGUse(SrcReg))
return false;
+ assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
+ assert(SrcMI->getOperand(1).isReg() &&
+ "EXTSW's second operand should be a register");
+ if (!Register::isVirtualRegister(SrcMI->getOperand(1).getReg()))
+ return false;
+
LLVM_DEBUG(dbgs() << "Combining pair: ");
LLVM_DEBUG(SrcMI->dump());
LLVM_DEBUG(MI.dump());
diff --git a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index d83c92276800..b1c0433641dd 100644
--- a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -57,6 +57,109 @@ namespace {
MachineFunctionProperties::Property::NoVRegs);
}
+ // This function removes any redundant load immediates. It has two level
+ // loops - The outer loop finds the load immediates BBI that could be used
+ // to replace following redundancy. The inner loop scans instructions that
+ // after BBI to find redundancy and update kill/dead flags accordingly. If
+ // AfterBBI is the same as BBI, it is redundant, otherwise any instructions
+ // that modify the def register of BBI would break the scanning.
+ // DeadOrKillToUnset is a pointer to the previous operand that had the
+ // kill/dead flag set. It keeps track of the def register of BBI, the use
+ // registers of AfterBBIs and the def registers of AfterBBIs.
+ bool removeRedundantLIs(MachineBasicBlock &MBB,
+ const TargetRegisterInfo *TRI) {
+ LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n";
+ MBB.dump(); dbgs() << "\n");
+
+ DenseSet<MachineInstr *> InstrsToErase;
+ for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
+ // Skip load immediate that is marked to be erased later because it
+ // cannot be used to replace any other instructions.
+ if (InstrsToErase.find(&*BBI) != InstrsToErase.end())
+ continue;
+ // Skip non-load immediate.
+ unsigned Opc = BBI->getOpcode();
+ if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS &&
+ Opc != PPC::LIS8)
+ continue;
+ // Skip load immediate, where the operand is a relocation (e.g., $r3 =
+ // LI target-flags(ppc-lo) %const.0).
+ if (!BBI->getOperand(1).isImm())
+ continue;
+ assert(BBI->getOperand(0).isReg() &&
+ "Expected a register for the first operand");
+
+ LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump(););
+
+ Register Reg = BBI->getOperand(0).getReg();
+ int64_t Imm = BBI->getOperand(1).getImm();
+ MachineOperand *DeadOrKillToUnset = nullptr;
+ if (BBI->getOperand(0).isDead()) {
+ DeadOrKillToUnset = &BBI->getOperand(0);
+ LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset
+ << " from load immediate " << *BBI
+ << " is a unsetting candidate\n");
+ }
+ // This loop scans instructions after BBI to see if there is any
+ // redundant load immediate.
+ for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end();
+ ++AfterBBI) {
+ // Track the operand that kill Reg. We would unset the kill flag of
+ // the operand if there is a following redundant load immediate.
+ int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
+ if (KillIdx != -1) {
+ assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
+ DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
+ LLVM_DEBUG(dbgs()
+ << " Kill flag of " << *DeadOrKillToUnset << " from "
+ << *AfterBBI << " is a unsetting candidate\n");
+ }
+
+ if (!AfterBBI->modifiesRegister(Reg, TRI))
+ continue;
+ // Finish scanning because Reg is overwritten by a non-load
+ // instruction.
+ if (AfterBBI->getOpcode() != Opc)
+ break;
+ assert(AfterBBI->getOperand(0).isReg() &&
+ "Expected a register for the first operand");
+ // Finish scanning because Reg is overwritten by a relocation or a
+ // different value.
+ if (!AfterBBI->getOperand(1).isImm() ||
+ AfterBBI->getOperand(1).getImm() != Imm)
+ break;
+
+ // It loads same immediate value to the same Reg, which is redundant.
+ // We would unset kill flag in previous Reg usage to extend live range
+ // of Reg first, then remove the redundancy.
+ if (DeadOrKillToUnset) {
+ LLVM_DEBUG(dbgs()
+ << " Unset dead/kill flag of " << *DeadOrKillToUnset
+ << " from " << *DeadOrKillToUnset->getParent());
+ if (DeadOrKillToUnset->isDef())
+ DeadOrKillToUnset->setIsDead(false);
+ else
+ DeadOrKillToUnset->setIsKill(false);
+ }
+ DeadOrKillToUnset =
+ AfterBBI->findRegisterDefOperand(Reg, true, true, TRI);
+ if (DeadOrKillToUnset)
+ LLVM_DEBUG(dbgs()
+ << " Dead flag of " << *DeadOrKillToUnset << " from "
+ << *AfterBBI << " is a unsetting candidate\n");
+ InstrsToErase.insert(&*AfterBBI);
+ LLVM_DEBUG(dbgs() << " Remove redundant load immediate: ";
+ AfterBBI->dump());
+ }
+ }
+
+ for (MachineInstr *MI : InstrsToErase) {
+ MI->eraseFromParent();
+ }
+ NumRemovedInPreEmit += InstrsToErase.size();
+ return !InstrsToErase.empty();
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole)
return false;
@@ -65,6 +168,7 @@ namespace {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
SmallVector<MachineInstr *, 4> InstrsToErase;
for (MachineBasicBlock &MBB : MF) {
+ Changed |= removeRedundantLIs(MBB, TRI);
for (MachineInstr &MI : MBB) {
unsigned Opc = MI.getOpcode();
// Detect self copies - these can result from running AADB.
@@ -111,7 +215,7 @@ namespace {
if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
continue;
MachineInstr *CRSetMI = nullptr;
- unsigned CRBit = Br->getOperand(0).getReg();
+ Register CRBit = Br->getOperand(0).getReg();
unsigned CRReg = getCRFromCRBit(CRBit);
bool SeenUse = false;
MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
diff --git a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
index 3a83cc27439c..6e9042643820 100644
--- a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
+++ b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
@@ -79,8 +79,8 @@ bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
for (auto SI = Splats.begin(); SI != Splats.end();) {
MachineInstr *SMI = *SI;
- unsigned SplatReg = SMI->getOperand(0).getReg();
- unsigned SrcReg = SMI->getOperand(1).getReg();
+ Register SplatReg = SMI->getOperand(0).getReg();
+ Register SrcReg = SMI->getOperand(1).getReg();
if (MI->modifiesRegister(SrcReg, TRI)) {
switch (MI->getOpcode()) {
@@ -102,7 +102,7 @@ bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
// the QPX splat source register.
unsigned SubRegIndex =
TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
- unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
+ Register SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
// Substitute both the explicit defined register, and also the
// implicit def of the containing QPX register.
diff --git a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index 8eaa6dfe2bf7..3b71ed219c17 100644
--- a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
@@ -381,10 +381,10 @@ private:
const MachineBranchProbabilityInfo *MBPI;
// A vector to contain all the CR logical operations
- std::vector<CRLogicalOpInfo> AllCRLogicalOps;
+ SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps;
void initialize(MachineFunction &MFParm);
void collectCRLogicals();
- bool handleCROp(CRLogicalOpInfo &CRI);
+ bool handleCROp(unsigned Idx);
bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI);
static bool isCRLogical(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
@@ -398,7 +398,7 @@ private:
// Not using a range-based for loop here as the vector may grow while being
// operated on.
for (unsigned i = 0; i < AllCRLogicalOps.size(); i++)
- Changed |= handleCROp(AllCRLogicalOps[i]);
+ Changed |= handleCROp(i);
return Changed;
}
@@ -535,15 +535,15 @@ MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg,
unsigned &Subreg,
MachineInstr *&CpDef) {
Subreg = -1;
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return nullptr;
MachineInstr *Copy = MRI->getVRegDef(Reg);
CpDef = Copy;
if (!Copy->isCopy())
return Copy;
- unsigned CopySrc = Copy->getOperand(1).getReg();
+ Register CopySrc = Copy->getOperand(1).getReg();
Subreg = Copy->getOperand(1).getSubReg();
- if (!TargetRegisterInfo::isVirtualRegister(CopySrc)) {
+ if (!Register::isVirtualRegister(CopySrc)) {
const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
// Set the Subreg
if (CopySrc == PPC::CR0EQ || CopySrc == PPC::CR6EQ)
@@ -578,10 +578,11 @@ void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) {
/// a unary CR logical might be used to change the condition code on a
/// comparison feeding it. A nullary CR logical might simply be removable
/// if the user of the bit it [un]sets can be transformed.
-bool PPCReduceCRLogicals::handleCROp(CRLogicalOpInfo &CRI) {
+bool PPCReduceCRLogicals::handleCROp(unsigned Idx) {
// We can definitely split a block on the inputs to a binary CR operation
// whose defs and (single) use are within the same block.
bool Changed = false;
+ CRLogicalOpInfo CRI = AllCRLogicalOps[Idx];
if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR &&
CRI.DefsSingleUse) {
Changed = splitBlockOnBinaryCROp(CRI);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 12554ea8d079..9ec26a19bdaa 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -325,13 +325,13 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
bool IsPositionIndependent = TM.isPositionIndependent();
if (hasBasePointer(MF)) {
- if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent)
+ if (Subtarget.is32BitELFABI() && IsPositionIndependent)
markSuperRegs(Reserved, PPC::R29);
else
markSuperRegs(Reserved, PPC::R30);
}
- if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent)
+ if (Subtarget.is32BitELFABI() && IsPositionIndependent)
markSuperRegs(Reserved, PPC::R30);
// Reserve Altivec registers when Altivec is unavailable.
@@ -391,7 +391,7 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co
bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg,
const MachineFunction &MF) const {
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+ assert(Register::isPhysicalRegister(PhysReg));
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (!TM.isPPC64())
@@ -425,7 +425,6 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case PPC::G8RC_NOX0RegClassID:
case PPC::GPRC_NOR0RegClassID:
case PPC::SPERCRegClassID:
- case PPC::SPE4RCRegClassID:
case PPC::G8RCRegClassID:
case PPC::GPRCRegClassID: {
unsigned FP = TFI->hasFP(MF) ? 1 : 0;
@@ -527,7 +526,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Fortunately, a frame greater than 32K is rare.
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
if (LP64)
@@ -549,7 +548,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
}
bool KillNegSizeReg = MI.getOperand(1).isKill();
- unsigned NegSizeReg = MI.getOperand(1).getReg();
+ Register NegSizeReg = MI.getOperand(1).getReg();
// Grow the stack and update the stack pointer link, then determine the
// address of new allocated space.
@@ -655,8 +654,8 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
- unsigned SrcReg = MI.getOperand(0).getReg();
+ Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ Register SrcReg = MI.getOperand(0).getReg();
// We need to store the CR in the low 4-bits of the saved value. First, issue
// an MFOCRF to save all of the CRBits and, if needed, kill the SrcReg.
@@ -700,8 +699,8 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ Register DestReg = MI.getOperand(0).getReg();
assert(MI.definesRegister(DestReg) &&
"RESTORE_CR does not define its destination");
@@ -744,8 +743,8 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
- unsigned SrcReg = MI.getOperand(0).getReg();
+ Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ Register SrcReg = MI.getOperand(0).getReg();
// Search up the BB to find the definition of the CR bit.
MachineBasicBlock::reverse_iterator Ins;
@@ -823,8 +822,8 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ Register DestReg = MI.getOperand(0).getReg();
assert(MI.definesRegister(DestReg) &&
"RESTORE_CRBIT does not define its destination");
@@ -833,7 +832,7 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
BuildMI(MBB, II, dl, TII.get(TargetOpcode::IMPLICIT_DEF), DestReg);
- unsigned RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ Register RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), RegO)
.addReg(getCRFromCRBit(DestReg));
@@ -870,8 +869,8 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
DebugLoc dl = MI.getDebugLoc();
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
- unsigned SrcReg = MI.getOperand(0).getReg();
+ Register Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ Register SrcReg = MI.getOperand(0).getReg();
BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
.addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
@@ -896,8 +895,8 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
DebugLoc dl = MI.getDebugLoc();
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
- unsigned DestReg = MI.getOperand(0).getReg();
+ Register Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ Register DestReg = MI.getOperand(0).getReg();
assert(MI.definesRegister(DestReg) &&
"RESTORE_VRSAVE does not define its destination");
@@ -1128,7 +1127,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
OperandBase = OffsetOperandNo;
}
- unsigned StackReg = MI.getOperand(FIOperandNum).getReg();
+ Register StackReg = MI.getOperand(FIOperandNum).getReg();
MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index af0dff6347a6..4719e947b172 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -253,15 +253,14 @@ def RM: PPCReg<"**ROUNDING MODE**">;
/// Register classes
// Allocate volatiles first
// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
-def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
- (sequence "R%u", 30, 13),
- R31, R0, R1, FP, BP)> {
+def GPRC : RegisterClass<"PPC", [i32,f32], 32, (add (sequence "R%u", 2, 12),
+ (sequence "R%u", 30, 13),
+ R31, R0, R1, FP, BP)> {
// On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
// put it at the end of the list.
let AltOrders = [(add (sub GPRC, R2), R2)];
let AltOrderSelect = [{
- const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
- return S.isPPC64() && S.isSVR4ABI();
+ return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
}];
}
@@ -272,21 +271,19 @@ def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
// put it at the end of the list.
let AltOrders = [(add (sub G8RC, X2), X2)];
let AltOrderSelect = [{
- const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
- return S.isPPC64() && S.isSVR4ABI();
+ return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
}];
}
// For some instructions r0 is special (representing the value 0 instead of
// the value in the r0 register), and we use these register subclasses to
// prevent r0 from being allocated for use by those instructions.
-def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)> {
+def GPRC_NOR0 : RegisterClass<"PPC", [i32,f32], 32, (add (sub GPRC, R0), ZERO)> {
// On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so
// put it at the end of the list.
let AltOrders = [(add (sub GPRC_NOR0, R2), R2)];
let AltOrderSelect = [{
- const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
- return S.isPPC64() && S.isSVR4ABI();
+ return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
}];
}
@@ -295,8 +292,7 @@ def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)> {
// put it at the end of the list.
let AltOrders = [(add (sub G8RC_NOX0, X2), X2)];
let AltOrderSelect = [{
- const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>();
- return S.isPPC64() && S.isSVR4ABI();
+ return MF.getSubtarget<PPCSubtarget>().is64BitELFABI();
}];
}
@@ -304,8 +300,6 @@ def SPERC : RegisterClass<"PPC", [f64], 64, (add (sequence "S%u", 2, 12),
(sequence "S%u", 30, 13),
S31, S0, S1)>;
-def SPE4RC : RegisterClass<"PPC", [f32], 32, (add GPRC)>;
-
// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
// ABI the size of the Floating-point register save area is determined by the
// allocated non-volatile register with the lowest register number, as FP
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 6aa7528634d3..10568ed4b655 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -60,7 +60,7 @@ PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU,
InstrInfo(*this), TLInfo(TM, *this) {}
void PPCSubtarget::initializeEnvironment() {
- StackAlignment = 16;
+ StackAlignment = Align(16);
DarwinDirective = PPC::DIR_NONE;
HasMFOCRF = false;
Has64BitSupport = false;
@@ -145,7 +145,8 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isDarwin())
HasLazyResolverStubs = true;
- if (TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() ||
+ if ((TargetTriple.isOSFreeBSD() && TargetTriple.getOSMajorVersion() >= 13) ||
+ TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() ||
TargetTriple.isMusl())
SecurePlt = true;
@@ -228,18 +229,13 @@ bool PPCSubtarget::enableSubRegLiveness() const {
return UseSubRegLiveness;
}
-unsigned char
-PPCSubtarget::classifyGlobalReference(const GlobalValue *GV) const {
- // Note that currently we don't generate non-pic references.
- // If a caller wants that, this will have to be updated.
-
+bool PPCSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
// Large code model always uses the TOC even for local symbols.
if (TM.getCodeModel() == CodeModel::Large)
- return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
-
+ return true;
if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
- return PPCII::MO_PIC_FLAG;
- return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
+ return false;
+ return true;
}
bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 55fec1cb6d99..d96c2893aee9 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -78,7 +78,7 @@ protected:
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
- unsigned StackAlignment;
+ Align StackAlignment;
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
@@ -166,7 +166,7 @@ public:
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
- unsigned getStackAlignment() const { return StackAlignment; }
+ Align getStackAlignment() const { return StackAlignment; }
/// getDarwinDirective - Returns the -m directive specified for the cpu.
///
@@ -210,7 +210,11 @@ public:
/// instructions, regardless of whether we are in 32-bit or 64-bit mode.
bool has64BitSupport() const { return Has64BitSupport; }
// useSoftFloat - Return true if soft-float option is turned on.
- bool useSoftFloat() const { return !HasHardFloat; }
+ bool useSoftFloat() const {
+ if (isAIXABI() && !HasHardFloat)
+ report_fatal_error("soft-float is not yet supported on AIX.");
+ return !HasHardFloat;
+ }
/// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
/// registers in 32-bit mode when possible. This can only true if
@@ -277,11 +281,11 @@ public:
bool hasDirectMove() const { return HasDirectMove; }
bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
- unsigned getPlatformStackAlignment() const {
+ Align getPlatformStackAlignment() const {
if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
- return 32;
+ return Align(32);
- return 16;
+ return Align(16);
}
// DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no
@@ -316,6 +320,9 @@ public:
bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); }
bool isELFv2ABI() const;
+ bool is64BitELFABI() const { return isSVR4ABI() && isPPC64(); }
+ bool is32BitELFABI() const { return isSVR4ABI() && !isPPC64(); }
+
/// Originally, this function return hasISEL(). Now we always enable it,
/// but may expand the ISEL instruction later.
bool enableEarlyIfConversion() const override { return true; }
@@ -337,9 +344,8 @@ public:
bool enableSubRegLiveness() const override;
- /// classifyGlobalReference - Classify a global variable reference for the
- /// current subtarget accourding to how we should reference it.
- unsigned char classifyGlobalReference(const GlobalValue *GV) const;
+ /// True if the GV will be accessed via an indirect symbol.
+ bool isGVIndirectSymbol(const GlobalValue *GV) const;
bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
};
diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index fb826c4a32f1..8f313d9d01c4 100644
--- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -74,8 +74,8 @@ protected:
LLVM_DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << MI);
- unsigned OutReg = MI.getOperand(0).getReg();
- unsigned InReg = MI.getOperand(1).getReg();
+ Register OutReg = MI.getOperand(0).getReg();
+ Register InReg = MI.getOperand(1).getReg();
DebugLoc DL = MI.getDebugLoc();
unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
unsigned Opc1, Opc2;
diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 3eb0569fb955..895ae6744421 100644
--- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -95,7 +95,8 @@ namespace {
protected:
bool hasTOCLoReloc(const MachineInstr &MI) {
if (MI.getOpcode() == PPC::LDtocL ||
- MI.getOpcode() == PPC::ADDItocL)
+ MI.getOpcode() == PPC::ADDItocL ||
+ MI.getOpcode() == PPC::LWZtocL)
return true;
for (const MachineOperand &MO : MI.operands()) {
@@ -109,11 +110,15 @@ protected:
bool processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
+ const bool isPPC64 =
+ MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64();
+ const unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
+
for (auto &MI : MBB) {
if (!hasTOCLoReloc(MI))
continue;
- MI.addOperand(MachineOperand::CreateReg(PPC::X2,
+ MI.addOperand(MachineOperand::CreateReg(TOCReg,
false /*IsDef*/,
true /*IsImp*/));
Changed = true;
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index ce00f848dd72..abefee8b339d 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -93,7 +93,7 @@ EnableMachineCombinerPass("ppc-machine-combiner",
static cl::opt<bool>
ReduceCRLogical("ppc-reduce-cr-logicals",
cl::desc("Expand eligible cr-logical binary ops to branches"),
- cl::init(false), cl::Hidden);
+ cl::init(true), cl::Hidden);
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
@@ -185,12 +185,13 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
- // If it isn't a Mach-O file then it's going to be a linux ELF
- // object file.
if (TT.isOSDarwin())
- return llvm::make_unique<TargetLoweringObjectFileMachO>();
+ return std::make_unique<TargetLoweringObjectFileMachO>();
+
+ if (TT.isOSAIX())
+ return std::make_unique<TargetLoweringObjectFileXCOFF>();
- return llvm::make_unique<PPC64LinuxTargetObjectFile>();
+ return std::make_unique<PPC64LinuxTargetObjectFile>();
}
static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
@@ -248,10 +249,19 @@ static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
report_fatal_error("Target does not support the kernel CodeModel", false);
return *CM;
}
- if (!TT.isOSDarwin() && !JIT &&
- (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le))
- return CodeModel::Medium;
- return CodeModel::Small;
+
+ if (JIT)
+ return CodeModel::Small;
+ if (TT.isOSAIX())
+ return CodeModel::Small;
+
+ assert(TT.isOSBinFormatELF() && "All remaining PPC OSes are ELF based.");
+
+ if (TT.isArch32Bit())
+ return CodeModel::Small;
+
+ assert(TT.isArch64Bit() && "Unsupported PPC architecture.");
+ return CodeModel::Medium;
}
@@ -259,8 +269,8 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
ScheduleDAGMILive *DAG =
new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ?
- llvm::make_unique<PPCPreRASchedStrategy>(C) :
- llvm::make_unique<GenericScheduler>(C));
+ std::make_unique<PPCPreRASchedStrategy>(C) :
+ std::make_unique<GenericScheduler>(C));
// add DAG Mutations here.
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
return DAG;
@@ -271,8 +281,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler(
const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
ScheduleDAGMI *DAG =
new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ?
- llvm::make_unique<PPCPostRASchedStrategy>(C) :
- llvm::make_unique<PostGenericScheduler>(C), true);
+ std::make_unique<PPCPostRASchedStrategy>(C) :
+ std::make_unique<PostGenericScheduler>(C), true);
// add DAG Mutations here.
return DAG;
}
@@ -328,7 +338,7 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<PPCSubtarget>(
+ I = std::make_unique<PPCSubtarget>(
TargetTriple, CPU,
// FIXME: It would be good to have the subtarget additions here
// not necessary. Anything that turns them on/off (overrides) ends
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index ff3dfbfaca05..f51300c656aa 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -594,10 +594,37 @@ bool PPCTTIImpl::enableInterleavedAccessVectorization() {
return true;
}
-unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
- if (Vector && !ST->hasAltivec() && !ST->hasQPX())
- return 0;
- return ST->hasVSX() ? 64 : 32;
+unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
+ assert(ClassID == GPRRC || ClassID == FPRRC ||
+ ClassID == VRRC || ClassID == VSXRC);
+ if (ST->hasVSX()) {
+ assert(ClassID == GPRRC || ClassID == VSXRC);
+ return ClassID == GPRRC ? 32 : 64;
+ }
+ assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
+ return 32;
+}
+
+unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {
+ if (Vector)
+ return ST->hasVSX() ? VSXRC : VRRC;
+ else if (Ty && Ty->getScalarType()->isFloatTy())
+ return ST->hasVSX() ? VSXRC : FPRRC;
+ else
+ return GPRRC;
+}
+
+const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
+
+ switch (ClassID) {
+ default:
+ llvm_unreachable("unknown register class");
+ return "PPC::unknown register class";
+ case GPRRC: return "PPC::GPRRC";
+ case FPRRC: return "PPC::FPRRC";
+ case VRRC: return "PPC::VRRC";
+ case VSXRC: return "PPC::VSXRC";
+ }
}
unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const {
@@ -613,7 +640,7 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const {
}
-unsigned PPCTTIImpl::getCacheLineSize() {
+unsigned PPCTTIImpl::getCacheLineSize() const {
// Check first if the user specified a custom line size.
if (CacheLineSize.getNumOccurrences() > 0)
return CacheLineSize;
@@ -628,7 +655,7 @@ unsigned PPCTTIImpl::getCacheLineSize() {
return 64;
}
-unsigned PPCTTIImpl::getPrefetchDistance() {
+unsigned PPCTTIImpl::getPrefetchDistance() const {
// This seems like a reasonable default for the BG/Q (this pass is enabled, by
// default, only on the BG/Q).
return 300;
@@ -752,6 +779,35 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return 0;
return Cost;
+
+ } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
+ if (ST->hasP9Altivec()) {
+ if (ISD == ISD::INSERT_VECTOR_ELT)
+ // A move-to VSR and a permute/insert. Assume vector operation cost
+ // for both (cost will be 2x on P9).
+ return vectorCostAdjustment(2, Opcode, Val, nullptr);
+
+ // It's an extract. Maybe we can do a cheap move-from VSR.
+ unsigned EltSize = Val->getScalarSizeInBits();
+ if (EltSize == 64) {
+ unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0;
+ if (Index == MfvsrdIndex)
+ return 1;
+ } else if (EltSize == 32) {
+ unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
+ if (Index == MfvsrwzIndex)
+ return 1;
+ }
+
+ // We need a vector extract (or mfvsrld). Assume vector operation cost.
+ // The cost of the load constant for a vector extract is disregarded
+ // (invariant, easily schedulable).
+ return vectorCostAdjustment(1, Opcode, Val, nullptr);
+
+ } else if (ST->hasDirectMove())
+ // Assume permute has standard cost.
+ // Assume move-to/move-from VSR have 2x standard cost.
+ return 3;
}
// Estimated cost of a load-hit-store delay. This was obtained
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 5d76ee418b69..83a70364bf68 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -72,10 +72,16 @@ public:
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
bool enableInterleavedAccessVectorization();
- unsigned getNumberOfRegisters(bool Vector);
+
+ enum PPCRegisterClass {
+ GPRRC, FPRRC, VRRC, VSXRC
+ };
+ unsigned getNumberOfRegisters(unsigned ClassID) const;
+ unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
+ const char* getRegisterClassName(unsigned ClassID) const;
unsigned getRegisterBitWidth(bool Vector) const;
- unsigned getCacheLineSize();
- unsigned getPrefetchDistance();
+ unsigned getCacheLineSize() const override;
+ unsigned getPrefetchDistance() const override;
unsigned getMaxInterleaveFactor(unsigned VF);
int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2);
int getArithmeticInstrCost(
diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp
index 719ed7b63878..3463bbbdc5f0 100644
--- a/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -50,7 +50,7 @@ namespace {
bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
MachineRegisterInfo &MRI) {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
return RC->hasSubClassEq(MRI.getRegClass(Reg));
} else if (RC->contains(Reg)) {
return true;
@@ -102,7 +102,7 @@ protected:
IsVSFReg(SrcMO.getReg(), MRI)) &&
"Unknown source for a VSX copy");
- unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
+ Register NewVReg = MRI.createVirtualRegister(SrcRC);
BuildMI(MBB, MI, MI.getDebugLoc(),
TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
.addImm(1) // add 1, not 0, because there is no implicit clearing
@@ -124,7 +124,7 @@ protected:
"Unknown destination for a VSX copy");
// Copy the VSX value into a new VSX register of the correct subclass.
- unsigned NewVReg = MRI.createVirtualRegister(DstRC);
+ Register NewVReg = MRI.createVirtualRegister(DstRC);
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
NewVReg)
.add(SrcMO);
diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index ce78239df0a8..5e150be544ed 100644
--- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -126,8 +126,8 @@ protected:
if (!AddendMI->isFullCopy())
continue;
- unsigned AddendSrcReg = AddendMI->getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) {
+ Register AddendSrcReg = AddendMI->getOperand(1).getReg();
+ if (Register::isVirtualRegister(AddendSrcReg)) {
if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
MRI.getRegClass(AddendSrcReg))
continue;
@@ -182,12 +182,12 @@ protected:
// %5 = A-form-op %5, %5, %11;
// where %5 and %11 are both kills. This case would be skipped
// otherwise.
- unsigned OldFMAReg = MI.getOperand(0).getReg();
+ Register OldFMAReg = MI.getOperand(0).getReg();
// Find one of the product operands that is killed by this instruction.
unsigned KilledProdOp = 0, OtherProdOp = 0;
- unsigned Reg2 = MI.getOperand(2).getReg();
- unsigned Reg3 = MI.getOperand(3).getReg();
+ Register Reg2 = MI.getOperand(2).getReg();
+ Register Reg3 = MI.getOperand(3).getReg();
if (LIS->getInterval(Reg2).Query(FMAIdx).isKill()
&& Reg2 != OldFMAReg) {
KilledProdOp = 2;
@@ -208,14 +208,14 @@ protected:
// legality checks above, the live range for the addend source register
// could be extended), but it seems likely that such a trivial copy can
// be coalesced away later, and thus is not worth the effort.
- if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) &&
+ if (Register::isVirtualRegister(AddendSrcReg) &&
!LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
continue;
// Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
- unsigned KilledProdReg = MI.getOperand(KilledProdOp).getReg();
- unsigned OtherProdReg = MI.getOperand(OtherProdOp).getReg();
+ Register KilledProdReg = MI.getOperand(KilledProdOp).getReg();
+ Register OtherProdReg = MI.getOperand(OtherProdOp).getReg();
unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg();
@@ -314,7 +314,7 @@ protected:
// Extend the live interval of the addend source (it might end at the
// copy to be removed, or somewhere in between there and here). This
// is necessary only if it is a physical register.
- if (!TargetRegisterInfo::isVirtualRegister(AddendSrcReg))
+ if (!Register::isVirtualRegister(AddendSrcReg))
for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid();
++Units) {
unsigned Unit = *Units;
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 44175af7f9b6..c3729da0b07b 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -158,7 +158,7 @@ private:
// Return true iff the given register is in the given class.
bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) {
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return RC->hasSubClassEq(MRI->getRegClass(Reg));
return RC->contains(Reg);
}
@@ -253,7 +253,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (isAnyVecReg(Reg, Partial)) {
RelevantInstr = true;
break;
@@ -566,7 +566,7 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
CopySrcReg = MI->getOperand(2).getReg();
}
- if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) {
+ if (!Register::isVirtualRegister(CopySrcReg)) {
if (!isScalarVecReg(CopySrcReg))
SwapVector[VecIdx].MentionsPhysVR = 1;
return CopySrcReg;
@@ -601,11 +601,11 @@ void PPCVSXSwapRemoval::formWebs() {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!isVecReg(Reg) && !isScalarVecReg(Reg))
continue;
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!Register::isVirtualRegister(Reg)) {
if (!(MI->isCopy() && isScalarVecReg(Reg)))
SwapVector[EntryIdx].MentionsPhysVR = 1;
continue;
@@ -667,7 +667,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
// than a swap instruction.
else if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
- unsigned DefReg = MI->getOperand(0).getReg();
+ Register DefReg = MI->getOperand(0).getReg();
// We skip debug instructions in the analysis. (Note that debug
// location information is still maintained by this optimization
@@ -695,9 +695,9 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
// other than a swap instruction.
} else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
- unsigned UseReg = MI->getOperand(0).getReg();
+ Register UseReg = MI->getOperand(0).getReg();
MachineInstr *DefMI = MRI->getVRegDef(UseReg);
- unsigned DefReg = DefMI->getOperand(0).getReg();
+ Register DefReg = DefMI->getOperand(0).getReg();
int DefIdx = SwapMap[DefMI];
if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad ||
@@ -756,7 +756,7 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() {
if (!SwapVector[Repr].WebRejected) {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
- unsigned DefReg = MI->getOperand(0).getReg();
+ Register DefReg = MI->getOperand(0).getReg();
for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) {
int UseIdx = SwapMap[&UseMI];
@@ -772,7 +772,7 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() {
if (!SwapVector[Repr].WebRejected) {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
- unsigned UseReg = MI->getOperand(0).getReg();
+ Register UseReg = MI->getOperand(0).getReg();
MachineInstr *DefMI = MRI->getVRegDef(UseReg);
int DefIdx = SwapMap[DefMI];
SwapVector[DefIdx].WillRemove = 1;
@@ -869,8 +869,8 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
Selector = 3 - Selector;
MI->getOperand(3).setImm(Selector);
- unsigned Reg1 = MI->getOperand(1).getReg();
- unsigned Reg2 = MI->getOperand(2).getReg();
+ Register Reg1 = MI->getOperand(1).getReg();
+ Register Reg2 = MI->getOperand(2).getReg();
MI->getOperand(1).setReg(Reg2);
MI->getOperand(2).setReg(Reg1);
@@ -894,9 +894,9 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
LLVM_DEBUG(dbgs() << "Changing SUBREG_TO_REG: ");
LLVM_DEBUG(MI->dump());
- unsigned DstReg = MI->getOperand(0).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
- unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ Register NewVReg = MRI->createVirtualRegister(DstRC);
MI->getOperand(0).setReg(NewVReg);
LLVM_DEBUG(dbgs() << " Into: ");
@@ -910,8 +910,8 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
// prior to the swap, and from VSRC to VRRC following the swap.
// Coalescing will usually remove all this mess.
if (DstRC == &PPC::VRRCRegClass) {
- unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
- unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
+ Register VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
+ Register VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
TII->get(PPC::COPY), VSRCTmp1)