diff options
Diffstat (limited to 'lib/Target/PowerPC')
45 files changed, 2172 insertions, 1244 deletions
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index c9524da93acd..aedf5b713c3f 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -579,7 +579,7 @@ public: static std::unique_ptr<PPCOperand> CreateToken(StringRef Str, SMLoc S, bool IsPPC64) { - auto Op = make_unique<PPCOperand>(Token); + auto Op = std::make_unique<PPCOperand>(Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -608,7 +608,7 @@ public: static std::unique_ptr<PPCOperand> CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) { - auto Op = make_unique<PPCOperand>(Immediate); + auto Op = std::make_unique<PPCOperand>(Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -618,7 +618,7 @@ public: static std::unique_ptr<PPCOperand> CreateExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) { - auto Op = make_unique<PPCOperand>(Expression); + auto Op = std::make_unique<PPCOperand>(Expression); Op->Expr.Val = Val; Op->Expr.CRVal = EvaluateCRExpr(Val); Op->StartLoc = S; @@ -629,7 +629,7 @@ public: static std::unique_ptr<PPCOperand> CreateTLSReg(const MCSymbolRefExpr *Sym, SMLoc S, SMLoc E, bool IsPPC64) { - auto Op = make_unique<PPCOperand>(TLSRegister); + auto Op = std::make_unique<PPCOperand>(TLSRegister); Op->TLSReg.Sym = Sym; Op->StartLoc = S; Op->EndLoc = E; @@ -639,7 +639,7 @@ public: static std::unique_ptr<PPCOperand> CreateContextImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) { - auto Op = make_unique<PPCOperand>(ContextImmediate); + auto Op = std::make_unique<PPCOperand>(ContextImmediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 7a8af57961cb..3597fd15eeb1 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -167,12 +167,6 @@ static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, QFRegs); } -static DecodeStatus DecodeSPE4RCRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, RRegs); -} - static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 042ddf48d5df..20f752c3041a 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -78,7 +78,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, // determine the type of the relocation unsigned Type; if (IsPCRel) { - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: llvm_unreachable("Unimplemented"); case PPC::fixup_ppc_br24: @@ -131,7 +131,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, break; } } else { - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: llvm_unreachable("invalid fixup kind!"); case FK_NONE: Type = ELF::R_PPC_NONE; @@ -443,5 +443,5 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, std::unique_ptr<MCObjectTargetWriter> llvm::createPPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) { - return llvm::make_unique<PPCELFObjectWriter>(Is64Bit, OSABI); + return std::make_unique<PPCELFObjectWriter>(Is64Bit, OSABI); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp index 0e64ae55ab1c..7fc231618fa9 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp @@ -66,6 +66,31 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) { + // Customize printing of the addis instruction on AIX. When an operand is a + // symbol reference, the instruction syntax is changed to look like a load + // operation, i.e: + // Transform: addis $rD, $rA, $src --> addis $rD, $src($rA). + if (TT.isOSAIX() && + (MI->getOpcode() == PPC::ADDIS8 || MI->getOpcode() == PPC::ADDIS) && + MI->getOperand(2).isExpr()) { + assert((MI->getOperand(0).isReg() && MI->getOperand(1).isReg()) && + "The first and the second operand of an addis instruction" + " should be registers."); + + assert(isa<MCSymbolRefExpr>(MI->getOperand(2).getExpr()) && + "The third operand of an addis instruction should be a symbol " + "reference expression if it is an expression at all."); + + O << "\taddis "; + printOperand(MI, 0, O); + O << ", "; + printOperand(MI, 2, O); + O << "("; + printOperand(MI, 1, O); + O << ")"; + return; + } + // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 5f0005ea1d7b..1216cd727289 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -86,4 +86,5 @@ void PPCXCOFFMCAsmInfo::anchor() {} PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) { assert(!IsLittleEndian && "Little-endian XCOFF not supported."); CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4; + ZeroDirective = "\t.space\t"; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index d467f5c4a439..fb9dd5d7aa75 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -19,8 +19,8 @@ using namespace llvm; const PPCMCExpr* PPCMCExpr::create(VariantKind Kind, const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return new (Ctx) PPCMCExpr(Kind, Expr, isDarwin); + bool IsDarwin, MCContext &Ctx) { + return new (Ctx) PPCMCExpr(Kind, Expr, IsDarwin); } void PPCMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index 449e2c34f74d..ad1454566162 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -45,21 +45,21 @@ public: /// @{ static const PPCMCExpr *create(VariantKind Kind, const MCExpr *Expr, - bool isDarwin, MCContext &Ctx); + bool IsDarwin, MCContext &Ctx); static const PPCMCExpr *createLo(const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return create(VK_PPC_LO, Expr, isDarwin, Ctx); + bool IsDarwin, MCContext &Ctx) { + return create(VK_PPC_LO, Expr, IsDarwin, Ctx); } static const PPCMCExpr *createHi(const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return create(VK_PPC_HI, Expr, isDarwin, Ctx); + bool IsDarwin, MCContext &Ctx) { + return create(VK_PPC_HI, Expr, IsDarwin, Ctx); } static const PPCMCExpr *createHa(const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return create(VK_PPC_HA, Expr, isDarwin, Ctx); + bool IsDarwin, MCContext &Ctx) { + return create(VK_PPC_HA, Expr, IsDarwin, Ctx); } /// @} diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index 4cf7fd15fa75..672f910ab086 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -178,7 +178,7 @@ static uint32_t getFixupOffset(const MCAsmLayout &Layout, uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); // On Mach-O, ppc_fixup_half16 relocations must refer to the // start of the instruction, not the second halfword, as ELF does - if (unsigned(Fixup.getKind()) == PPC::fixup_ppc_half16) + if (Fixup.getTargetKind() == PPC::fixup_ppc_half16) FixupOffset &= ~uint32_t(3); return FixupOffset; } @@ -376,5 +376,5 @@ void PPCMachObjectWriter::RecordPPCRelocation( std::unique_ptr<MCObjectTargetWriter> llvm::createPPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) { - return llvm::make_unique<PPCMachObjectWriter>(Is64Bit, CPUType, CPUSubtype); + return std::make_unique<PPCMachObjectWriter>(Is64Bit, CPUType, CPUSubtype); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp index 9c661286d455..7fdbb8990b55 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp @@ -25,5 +25,5 @@ PPCXCOFFObjectWriter::PPCXCOFFObjectWriter(bool Is64Bit) std::unique_ptr<MCObjectTargetWriter> llvm::createPPCXCOFFObjectWriter(bool Is64Bit) { - return llvm::make_unique<PPCXCOFFObjectWriter>(Is64Bit); + return std::make_unique<PPCXCOFFObjectWriter>(Is64Bit); } diff --git a/lib/Target/PowerPC/P9InstrResources.td b/lib/Target/PowerPC/P9InstrResources.td index 2a10322d3f49..f6cd8ed00c82 100644 --- a/lib/Target/PowerPC/P9InstrResources.td +++ b/lib/Target/PowerPC/P9InstrResources.td @@ -64,6 +64,7 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], XXLAND, XXLANDC, XXLEQV, + XXLEQVOnes, XXLNAND, XXLNOR, XXLOR, @@ -124,8 +125,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], (instregex "SRAD(I)?$"), (instregex "EXTSWSLI_32_64$"), (instregex "MFV(S)?RD$"), - (instregex "MTVSRD$"), - (instregex "MTVSRW(A|Z)$"), + (instregex "MTV(S)?RD$"), + (instregex "MTV(S)?RW(A|Z)$"), (instregex "CMP(WI|LWI|W|LW)(8)?$"), (instregex "CMP(L)?D(I)?$"), (instregex "SUBF(I)?C(8)?$"), @@ -148,7 +149,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"), (instregex "ADD(4|8)(TLS)?(_)?$"), (instregex "NEG(8)?$"), - (instregex "ADDI(S)?toc(HA|L)$"), + (instregex "ADDI(S)?toc(HA|L)(8)?$"), COPY, MCRF, MCRXRX, @@ -158,6 +159,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], XSNEGDP, XSCPSGNDP, MFVSRWZ, + MFVRWZ, EXTSWSLI, SRADI_32, RLDIC, diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index c6951ab67b08..0534773c4c9e 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -50,10 +50,10 @@ namespace llvm { FunctionPass *createPPCExpandISELPass(); FunctionPass *createPPCPreEmitPeepholePass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AsmPrinter &AP, bool isDarwin); + AsmPrinter &AP, bool IsDarwin); bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &OutMO, AsmPrinter &AP, - bool isDarwin); + bool IsDarwin); void initializePPCCTRLoopsPass(PassRegistry&); #ifndef NDEBUG @@ -86,8 +86,8 @@ namespace llvm { MO_NO_FLAG, /// On a symbol operand "FOO", this indicates that the reference is actually - /// to "FOO@plt". This is used for calls and jumps to external functions on - /// for PIC calls on Linux and ELF systems. + /// to "FOO@plt". This is used for calls and jumps to external functions + /// and for PIC calls on 32-bit ELF systems. MO_PLT = 1, /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index bd87ce06b4fb..66236b72a1a3 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -51,9 +51,11 @@ #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" @@ -76,7 +78,7 @@ namespace { class PPCAsmPrinter : public AsmPrinter { protected: - MapVector<MCSymbol *, MCSymbol *> TOC; + MapVector<const MCSymbol *, MCSymbol *> TOC; const PPCSubtarget *Subtarget; StackMaps SM; @@ -87,7 +89,7 @@ public: StringRef getPassName() const override { return "PowerPC Assembly Printer"; } - MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym); + MCSymbol *lookUpOrCreateTOCEntry(const MCSymbol *Sym); bool doInitialization(Module &M) override { if (!TOC.empty()) @@ -164,6 +166,14 @@ public: : PPCAsmPrinter(TM, std::move(Streamer)) {} StringRef getPassName() const override { return "AIX PPC Assembly Printer"; } + + void SetupMachineFunction(MachineFunction &MF) override; + + void EmitGlobalVariable(const GlobalVariable *GV) override; + + void EmitFunctionDescriptor() override; + + void EmitEndOfAsmFile(Module &) override; }; } // end anonymous namespace @@ -265,7 +275,7 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return true; // This operand uses VSX numbering. // If the operand is a VMX register, convert it to a VSX register. - unsigned Reg = MI->getOperand(OpNo).getReg(); + Register Reg = MI->getOperand(OpNo).getReg(); if (PPCInstrInfo::isVRRegister(Reg)) Reg = PPC::VSX32 + (Reg - PPC::V0); else if (PPCInstrInfo::isVFRegister(Reg)) @@ -328,7 +338,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, /// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry /// exists for it. If not, create one. Then return a symbol that references /// the TOC entry. -MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { +MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(const MCSymbol *Sym) { MCSymbol *&TOCEntry = TOC[Sym]; if (!TOCEntry) TOCEntry = createTempSymbol("C"); @@ -378,7 +388,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) { if (CallTarget) { assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget && "High 16 bits of call target should be zero."); - unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); + Register ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); EncodedBytes = 0; // Materialize the jump address: EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI8) @@ -502,13 +512,32 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, .addExpr(SymVar)); } +/// Map a machine operand for a TOC pseudo-machine instruction to its +/// corresponding MCSymbol. +static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO, + AsmPrinter &AP) { + switch (MO.getType()) { + case MachineOperand::MO_GlobalAddress: + return AP.getSymbol(MO.getGlobal()); + case MachineOperand::MO_ConstantPoolIndex: + return AP.GetCPISymbol(MO.getIndex()); + case MachineOperand::MO_JumpTableIndex: + return AP.GetJTISymbol(MO.getIndex()); + case MachineOperand::MO_BlockAddress: + return AP.GetBlockAddressSymbol(MO.getBlockAddress()); + default: + llvm_unreachable("Unexpected operand type to get symbol."); + } +} + /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to /// the current output stream. /// void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; - bool isPPC64 = Subtarget->isPPC64(); - bool isDarwin = TM.getTargetTriple().isOSDarwin(); + const bool IsDarwin = TM.getTargetTriple().isOSDarwin(); + const bool IsPPC64 = Subtarget->isPPC64(); + const bool IsAIX = Subtarget->isAIXABI(); const Module *M = MF->getFunction().getParent(); PICLevel::Level PL = M->getPICLevel(); @@ -517,7 +546,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (!MI->isInlineAsm()) { for (const MachineOperand &MO: MI->operands()) { if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Subtarget->hasSPE()) { if (PPC::F4RCRegClass.contains(Reg) || PPC::F8RCRegClass.contains(Reg) || @@ -595,7 +624,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // addis r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@ha // addi r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@l // Get the offset from the GOT Base Register to the GOT - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); if (Subtarget->isSecurePlt() && isPositionIndependent() ) { unsigned PICR = TmpInst.getOperand(0).getReg(); MCSymbol *BaseSymbol = OutContext.getOrCreateSymbol( @@ -646,43 +675,57 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } } case PPC::LWZtoc: { - // Transform %r3 = LWZtoc @min1, %r2 - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + assert(!IsDarwin && "TOC is an ELF/XCOFF construct."); + + // Transform %rN = LWZtoc @op1, %r2 + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to LWZ, and the global address operand to be a - // reference to the GOT entry we will synthesize later. + // Change the opcode to LWZ. TmpInst.setOpcode(PPC::LWZ); + const MachineOperand &MO = MI->getOperand(1); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand for LWZtoc."); - // Map symbol -> label of TOC entry - assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()); - MCSymbol *MOSymbol = nullptr; - if (MO.isGlobal()) - MOSymbol = getSymbol(MO.getGlobal()); - else if (MO.isCPI()) - MOSymbol = GetCPISymbol(MO.getIndex()); - else if (MO.isJTI()) - MOSymbol = GetJTISymbol(MO.getIndex()); - else if (MO.isBlockAddress()) - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - - if (PL == PICLevel::SmallPIC) { + // Map the operand to its corresponding MCSymbol. + const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + + // Create a reference to the GOT entry for the symbol. The GOT entry will be + // synthesized later. + if (PL == PICLevel::SmallPIC && !IsAIX) { const MCExpr *Exp = MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_GOT, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); - } else { - MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + EmitToStreamer(*OutStreamer, TmpInst); + return; + } - const MCExpr *Exp = - MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None, - OutContext); - const MCExpr *PB = - MCSymbolRefExpr::create(OutContext.getOrCreateSymbol(Twine(".LTOC")), - OutContext); - Exp = MCBinaryExpr::createSub(Exp, PB, OutContext); + // Otherwise, use the TOC. 'TOCEntry' is a label used to reference the + // storage allocated in the TOC which contains the address of + // 'MOSymbol'. Said TOC entry will be synthesized later. + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + const MCExpr *Exp = + MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None, OutContext); + + // AIX uses the label directly as the lwz displacement operand for + // references into the toc section. The displacement value will be generated + // relative to the toc-base. + if (IsAIX) { + assert( + TM.getCodeModel() == CodeModel::Small && + "This pseudo should only be selected for 32-bit small code model."); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); + EmitToStreamer(*OutStreamer, TmpInst); + return; } + + // Create an explicit subtract expression between the local symbol and + // '.LTOC' to manifest the toc-relative offset. + const MCExpr *PB = MCSymbolRefExpr::create( + OutContext.getOrCreateSymbol(Twine(".LTOC")), OutContext); + Exp = MCBinaryExpr::createSub(Exp, PB, OutContext); + TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; } @@ -690,72 +733,121 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::LDtocCPT: case PPC::LDtocBA: case PPC::LDtoc: { + assert(!IsDarwin && "TOC is an ELF/XCOFF construct"); + // Transform %x3 = LDtoc @min1, %x2 - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to LD, and the global address operand to be a - // reference to the TOC entry we will synthesize later. + // Change the opcode to LD. TmpInst.setOpcode(PPC::LD); + const MachineOperand &MO = MI->getOperand(1); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand!"); + + // Map the machine operand to its corresponding MCSymbol, then map the + // global address operand to be a reference to the TOC entry we will + // synthesize later. + MCSymbol *TOCEntry = + lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this)); + + const MCSymbolRefExpr::VariantKind VK = + IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC; + const MCExpr *Exp = + MCSymbolRefExpr::create(TOCEntry, VK, OutContext); + TmpInst.getOperand(1) = MCOperand::createExpr(Exp); + EmitToStreamer(*OutStreamer, TmpInst); + return; + } + case PPC::ADDIStocHA: { + assert((IsAIX && !IsPPC64 && TM.getCodeModel() == CodeModel::Large) && + "This pseudo should only be selected for 32-bit large code model on" + " AIX."); + + // Transform %rd = ADDIStocHA %rA, @sym(%r2) + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Map symbol -> label of TOC entry - assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()); - MCSymbol *MOSymbol = nullptr; - if (MO.isGlobal()) - MOSymbol = getSymbol(MO.getGlobal()); - else if (MO.isCPI()) - MOSymbol = GetCPISymbol(MO.getIndex()); - else if (MO.isJTI()) - MOSymbol = GetJTISymbol(MO.getIndex()); - else if (MO.isBlockAddress()) - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); + // Change the opcode to ADDIS. + TmpInst.setOpcode(PPC::ADDIS); + const MachineOperand &MO = MI->getOperand(2); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand for ADDIStocHA."); + + // Map the machine operand to its corresponding MCSymbol. + MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + + // Always use TOC on AIX. Map the global address operand to be a reference + // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to + // reference the storage allocated in the TOC which contains the address of + // 'MOSymbol'. MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry, + MCSymbolRefExpr::VK_PPC_U, + OutContext); + TmpInst.getOperand(2) = MCOperand::createExpr(Exp); + EmitToStreamer(*OutStreamer, TmpInst); + return; + } + case PPC::LWZtocL: { + assert(IsAIX && !IsPPC64 && TM.getCodeModel() == CodeModel::Large && + "This pseudo should only be selected for 32-bit large code model on" + " AIX."); - const MCExpr *Exp = - MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, - OutContext); + // Transform %rd = LWZtocL @sym, %rs. + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); + + // Change the opcode to lwz. + TmpInst.setOpcode(PPC::LWZ); + + const MachineOperand &MO = MI->getOperand(1); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand for LWZtocL."); + + // Map the machine operand to its corresponding MCSymbol. + MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + + // Always use TOC on AIX. Map the global address operand to be a reference + // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to + // reference the storage allocated in the TOC which contains the address of + // 'MOSymbol'. + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry, + MCSymbolRefExpr::VK_PPC_L, + OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; } + case PPC::ADDIStocHA8: { + assert(!IsDarwin && "TOC is an ELF/XCOFF construct"); - case PPC::ADDIStocHA: { - // Transform %xd = ADDIStocHA %x2, @sym - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + // Transform %xd = ADDIStocHA8 %x2, @sym + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to ADDIS8. If the global address is external, has - // common linkage, is a non-local function address, or is a jump table - // address, then generate a TOC entry and reference that. Otherwise - // reference the symbol directly. + // Change the opcode to ADDIS8. If the global address is the address of + // an external symbol, is a jump table address, is a block address, or is a + // constant pool index with large code model enabled, then generate a TOC + // entry and reference that. Otherwise, reference the symbol directly. TmpInst.setOpcode(PPC::ADDIS8); + const MachineOperand &MO = MI->getOperand(2); - assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || - MO.isBlockAddress()) && - "Invalid operand for ADDIStocHA!"); - MCSymbol *MOSymbol = nullptr; - bool GlobalToc = false; - - if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - MOSymbol = getSymbol(GV); - unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); - GlobalToc = (GVFlags & PPCII::MO_NLP_FLAG); - } else if (MO.isCPI()) { - MOSymbol = GetCPISymbol(MO.getIndex()); - } else if (MO.isJTI()) { - MOSymbol = GetJTISymbol(MO.getIndex()); - } else if (MO.isBlockAddress()) { - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - } + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand for ADDIStocHA8!"); + + const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + const bool GlobalToc = + MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal()); if (GlobalToc || MO.isJTI() || MO.isBlockAddress() || - TM.getCodeModel() == CodeModel::Large) + (MO.isCPI() && TM.getCodeModel() == CodeModel::Large)) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + const MCSymbolRefExpr::VariantKind VK = + IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA; + const MCExpr *Exp = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA, - OutContext); + MCSymbolRefExpr::create(MOSymbol, VK, OutContext); if (!MO.isJTI() && MO.getOffset()) Exp = MCBinaryExpr::createAdd(Exp, @@ -768,73 +860,59 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::LDtocL: { + assert(!IsDarwin && "TOC is an ELF/XCOFF construct"); + // Transform %xd = LDtocL @sym, %xs - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to LD. If the global address is external, has - // common linkage, or is a jump table address, then reference the - // associated TOC entry. Otherwise reference the symbol directly. + // Change the opcode to LD. If the global address is the address of + // an external symbol, is a jump table address, is a block address, or is + // a constant pool index with large code model enabled, then generate a + // TOC entry and reference that. Otherwise, reference the symbol directly. TmpInst.setOpcode(PPC::LD); + const MachineOperand &MO = MI->getOperand(1); assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && "Invalid operand for LDtocL!"); - MCSymbol *MOSymbol = nullptr; - if (MO.isJTI()) - MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); - else if (MO.isBlockAddress()) { - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); - } - else if (MO.isCPI()) { - MOSymbol = GetCPISymbol(MO.getIndex()); - if (TM.getCodeModel() == CodeModel::Large) - MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); - } - else if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - MOSymbol = getSymbol(GV); - LLVM_DEBUG( - unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); - assert((GVFlags & PPCII::MO_NLP_FLAG) && - "LDtocL used on symbol that could be accessed directly is " - "invalid. Must match ADDIStocHA.")); + LLVM_DEBUG(assert( + (!MO.isGlobal() || Subtarget->isGVIndirectSymbol(MO.getGlobal())) && + "LDtocL used on symbol that could be accessed directly is " + "invalid. Must match ADDIStocHA8.")); + + const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + + if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); - } + const MCSymbolRefExpr::VariantKind VK = + IsAIX ? MCSymbolRefExpr::VK_PPC_L : MCSymbolRefExpr::VK_PPC_TOC_LO; const MCExpr *Exp = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, - OutContext); + MCSymbolRefExpr::create(MOSymbol, VK, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; } case PPC::ADDItocL: { // Transform %xd = ADDItocL %xs, @sym - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to ADDI8. If the global address is external, then - // generate a TOC entry and reference that. Otherwise reference the + // Change the opcode to ADDI8. If the global address is external, then + // generate a TOC entry and reference that. Otherwise, reference the // symbol directly. TmpInst.setOpcode(PPC::ADDI8); + const MachineOperand &MO = MI->getOperand(2); - assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL"); - MCSymbol *MOSymbol = nullptr; - - if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - LLVM_DEBUG(unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); - assert(!(GVFlags & PPCII::MO_NLP_FLAG) && - "Interposable definitions must use indirect access.")); - MOSymbol = getSymbol(GV); - } else if (MO.isCPI()) { - MOSymbol = GetCPISymbol(MO.getIndex()); - } + assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL."); + + LLVM_DEBUG(assert( + !(MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal())) && + "Interposable definitions must use indirect access.")); const MCExpr *Exp = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, - OutContext); + MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this), + MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(2) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; @@ -842,13 +920,13 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDISgotTprelHA: { // Transform: %xd = ADDISgotTprelHA %x2, @sym // Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha - assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); + assert(IsPPC64 && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTprel = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA, - OutContext); + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA, + OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) @@ -858,16 +936,17 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::LDgotTprelL: case PPC::LDgotTprelL32: { // Transform %xd = LDgotTprelL @sym, %xs - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); // Change the opcode to LD. - TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ); + TmpInst.setOpcode(IsPPC64 ? PPC::LD : PPC::LWZ); const MachineOperand &MO = MI->getOperand(1); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *Exp = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO, - OutContext); + const MCExpr *Exp = MCSymbolRefExpr::create( + MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO + : MCSymbolRefExpr::VK_PPC_GOT_TPREL, + OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; @@ -920,7 +999,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDIStlsgdHA: { // Transform: %xd = ADDIStlsgdHA %x2, @sym // Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha - assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); + assert(IsPPC64 && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -943,11 +1022,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsGD = MCSymbolRefExpr::create( - MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO - : MCSymbolRefExpr::VK_PPC_GOT_TLSGD, + MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO + : MCSymbolRefExpr::VK_PPC_GOT_TLSGD, OutContext); EmitToStreamer(*OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI) + MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsGD)); @@ -965,7 +1044,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDIStlsldHA: { // Transform: %xd = ADDIStlsldHA %x2, @sym // Into: %xd = ADDIS8 %x2, sym@got@tlsld@ha - assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); + assert(IsPPC64 && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -988,11 +1067,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsLD = MCSymbolRefExpr::create( - MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO - : MCSymbolRefExpr::VK_PPC_GOT_TLSLD, + MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO + : MCSymbolRefExpr::VK_PPC_GOT_TLSLD, OutContext); EmitToStreamer(*OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI) + MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsLD)); @@ -1021,7 +1100,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutContext); EmitToStreamer( *OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS) + MCInstBuilder(IsPPC64 ? PPC::ADDIS8 : PPC::ADDIS) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); @@ -1040,7 +1119,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, OutContext); EmitToStreamer(*OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI) + MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); @@ -1087,7 +1166,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // suite shows a handful of test cases that fail this check for // Darwin. Those need to be investigated before this sanity test // can be enabled for those subtargets. - if (!Subtarget->isDarwin()) { + if (!IsDarwin) { unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1; const MachineOperand &MO = MI->getOperand(OpNum); if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4) @@ -1098,7 +1177,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } } - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); EmitToStreamer(*OutStreamer, TmpInst); } @@ -1368,15 +1447,16 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { ".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); OutStreamer->SwitchSection(Section); - for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(), - E = TOC.end(); I != E; ++I) { - OutStreamer->EmitLabel(I->second); - MCSymbol *S = I->first; + for (const auto &TOCMapPair : TOC) { + const MCSymbol *const TOCEntryTarget = TOCMapPair.first; + MCSymbol *const TOCEntryLabel = TOCMapPair.second; + + OutStreamer->EmitLabel(TOCEntryLabel); if (isPPC64) { - TS.emitTCEntry(*S); + TS.emitTCEntry(*TOCEntryTarget); } else { OutStreamer->EmitValueToAlignment(4); - OutStreamer->EmitSymbolValue(S, 4); + OutStreamer->EmitSymbolValue(TOCEntryTarget, 4); } } } @@ -1602,7 +1682,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { if (!Stubs.empty()) { // Switch with ".non_lazy_symbol_pointer" directive. OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); - EmitAlignment(isPPC64 ? 3 : 2); + EmitAlignment(isPPC64 ? Align(8) : Align(4)); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { // L_foo$stub: @@ -1643,6 +1723,106 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } +void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) { + // Get the function descriptor symbol. + CurrentFnDescSym = getSymbol(&MF.getFunction()); + // Set the containing csect. + MCSectionXCOFF *FnDescSec = OutStreamer->getContext().getXCOFFSection( + CurrentFnDescSym->getName(), XCOFF::XMC_DS, XCOFF::XTY_SD, + XCOFF::C_HIDEXT, SectionKind::getData()); + cast<MCSymbolXCOFF>(CurrentFnDescSym)->setContainingCsect(FnDescSec); + + return AsmPrinter::SetupMachineFunction(MF); +} + +void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + // Early error checking limiting what is supported. + if (GV->isThreadLocal()) + report_fatal_error("Thread local not yet supported on AIX."); + + if (GV->hasSection()) + report_fatal_error("Custom section for Data not yet supported."); + + if (GV->hasComdat()) + report_fatal_error("COMDAT not yet supported by AIX."); + + SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM); + if (!GVKind.isCommon() && !GVKind.isBSSLocal() && !GVKind.isData()) + report_fatal_error("Encountered a global variable kind that is " + "not supported yet."); + + // Create the containing csect and switch to it. + MCSectionXCOFF *CSect = cast<MCSectionXCOFF>( + getObjFileLowering().SectionForGlobal(GV, GVKind, TM)); + OutStreamer->SwitchSection(CSect); + + // Create the symbol, set its storage class, and emit it. + MCSymbolXCOFF *GVSym = cast<MCSymbolXCOFF>(getSymbol(GV)); + GVSym->setStorageClass( + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV)); + GVSym->setContainingCsect(CSect); + + const DataLayout &DL = GV->getParent()->getDataLayout(); + + // Handle common symbols. + if (GVKind.isCommon() || GVKind.isBSSLocal()) { + unsigned Align = + GV->getAlignment() ? GV->getAlignment() : DL.getPreferredAlignment(GV); + uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); + + if (GVKind.isBSSLocal()) + OutStreamer->EmitXCOFFLocalCommonSymbol(GVSym, Size, Align); + else + OutStreamer->EmitCommonSymbol(GVSym, Size, Align); + return; + } + + MCSymbol *EmittedInitSym = GVSym; + EmitLinkage(GV, EmittedInitSym); + EmitAlignment(getGVAlignment(GV, DL), GV); + OutStreamer->EmitLabel(EmittedInitSym); + EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); +} + +void PPCAIXAsmPrinter::EmitFunctionDescriptor() { + const DataLayout &DL = getDataLayout(); + const unsigned PointerSize = DL.getPointerSizeInBits() == 64 ? 8 : 4; + + MCSectionSubPair Current = OutStreamer->getCurrentSection(); + // Emit function descriptor. + OutStreamer->SwitchSection( + cast<MCSymbolXCOFF>(CurrentFnDescSym)->getContainingCsect()); + OutStreamer->EmitLabel(CurrentFnDescSym); + // Emit function entry point address. + OutStreamer->EmitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext), + PointerSize); + // Emit TOC base address. + MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]")); + OutStreamer->EmitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext), + PointerSize); + // Emit a null environment pointer. + OutStreamer->EmitIntValue(0, PointerSize); + + OutStreamer->SwitchSection(Current.first, Current.second); +} + +void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) { + // If there are no functions in this module, we will never need to reference + // the TOC base. + if (M.empty()) + return; + + // Emit TOC base. + MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]")); + MCSectionXCOFF *TOCBaseSection = OutStreamer->getContext().getXCOFFSection( + StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT, + SectionKind::getData()); + cast<MCSymbolXCOFF>(TOCBaseSym)->setContainingCsect(TOCBaseSection); + // Switch to section to emit TOC base. + OutStreamer->SwitchSection(TOCBaseSection); +} + + /// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code /// for a MachineFunction to the given output stream, in a format that the /// Darwin assembler can deal with. diff --git a/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/lib/Target/PowerPC/PPCBranchCoalescing.cpp index 5e9a661f8f0b..d325b078979f 100644 --- a/lib/Target/PowerPC/PPCBranchCoalescing.cpp +++ b/lib/Target/PowerPC/PPCBranchCoalescing.cpp @@ -340,9 +340,10 @@ bool PPCBranchCoalescing::identicalOperands( if (Op1.isIdenticalTo(Op2)) { // filter out instructions with physical-register uses - if (Op1.isReg() && TargetRegisterInfo::isPhysicalRegister(Op1.getReg()) - // If the physical register is constant then we can assume the value - // has not changed between uses. + if (Op1.isReg() && + Register::isPhysicalRegister(Op1.getReg()) + // If the physical register is constant then we can assume the value + // has not changed between uses. && !(Op1.isUse() && MRI->isConstantPhysReg(Op1.getReg()))) { LLVM_DEBUG(dbgs() << "The operands are not provably identical.\n"); return false; @@ -355,8 +356,8 @@ bool PPCBranchCoalescing::identicalOperands( // definition of the register produces the same value. If they produce the // same value, consider them to be identical. if (Op1.isReg() && Op2.isReg() && - TargetRegisterInfo::isVirtualRegister(Op1.getReg()) && - TargetRegisterInfo::isVirtualRegister(Op2.getReg())) { + Register::isVirtualRegister(Op1.getReg()) && + Register::isVirtualRegister(Op2.getReg())) { MachineInstr *Op1Def = MRI->getVRegDef(Op1.getReg()); MachineInstr *Op2Def = MRI->getVRegDef(Op2.getReg()); if (TII->produceSameValue(*Op1Def, *Op2Def, MRI)) { @@ -456,7 +457,7 @@ bool PPCBranchCoalescing::canMoveToEnd(const MachineInstr &MI, << TargetMBB.getNumber() << "\n"); for (auto &Use : MI.uses()) { - if (Use.isReg() && TargetRegisterInfo::isVirtualRegister(Use.getReg())) { + if (Use.isReg() && Register::isVirtualRegister(Use.getReg())) { MachineInstr *DefInst = MRI->getVRegDef(Use.getReg()); if (DefInst->isPHI() && DefInst->getParent() == MI.getParent()) { LLVM_DEBUG(dbgs() << " *** Cannot move this instruction ***\n"); diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 793d690baec3..cdff4d383d23 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -81,21 +81,20 @@ FunctionPass *llvm::createPPCBranchSelectionPass() { /// original Offset. unsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB, unsigned Offset) { - unsigned Align = MBB.getAlignment(); - if (!Align) + const Align Alignment = MBB.getAlignment(); + if (Alignment == Align::None()) return 0; - unsigned AlignAmt = 1 << Align; - unsigned ParentAlign = MBB.getParent()->getAlignment(); + const Align ParentAlign = MBB.getParent()->getAlignment(); - if (Align <= ParentAlign) - return OffsetToAlignment(Offset, AlignAmt); + if (Alignment <= ParentAlign) + return offsetToAlignment(Offset, Alignment); // The alignment of this MBB is larger than the function's alignment, so we // can't tell whether or not it will insert nops. Assume that it will. if (FirstImpreciseBlock < 0) FirstImpreciseBlock = MBB.getNumber(); - return AlignAmt + OffsetToAlignment(Offset, AlignAmt); + return Alignment.value() + offsetToAlignment(Offset, Alignment); } /// We need to be careful about the offset of the first block in the function @@ -179,7 +178,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn, const MachineBasicBlock *Dest, unsigned BrOffset) { int BranchSize; - unsigned MaxAlign = 2; + Align MaxAlign = Align(4); bool NeedExtraAdjustment = false; if (Dest->getNumber() <= Src->getNumber()) { // If this is a backwards branch, the delta is the offset from the @@ -192,8 +191,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn, BranchSize += BlockSizes[DestBlock].first; for (unsigned i = DestBlock+1, e = Src->getNumber(); i < e; ++i) { BranchSize += BlockSizes[i].first; - MaxAlign = std::max(MaxAlign, - Fn.getBlockNumbered(i)->getAlignment()); + MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment()); } NeedExtraAdjustment = (FirstImpreciseBlock >= 0) && @@ -207,8 +205,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn, MaxAlign = std::max(MaxAlign, Dest->getAlignment()); for (unsigned i = StartBlock+1, e = Dest->getNumber(); i != e; ++i) { BranchSize += BlockSizes[i].first; - MaxAlign = std::max(MaxAlign, - Fn.getBlockNumbered(i)->getAlignment()); + MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment()); } NeedExtraAdjustment = (FirstImpreciseBlock >= 0) && @@ -258,7 +255,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn, // The computed offset is at most ((1 << alignment) - 4) bytes smaller // than actual offset. So we add this number to the offset for safety. if (NeedExtraAdjustment) - BranchSize += (1 << MaxAlign) - 4; + BranchSize += MaxAlign.value() - 4; return BranchSize; } @@ -339,16 +336,16 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { // 1. CR register // 2. Target MBB PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm(); - unsigned CRReg = I->getOperand(1).getReg(); + Register CRReg = I->getOperand(1).getReg(); // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. BuildMI(MBB, I, dl, TII->get(PPC::BCC)) .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); } else if (I->getOpcode() == PPC::BC) { - unsigned CRBit = I->getOperand(0).getReg(); + Register CRBit = I->getOperand(0).getReg(); BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BCn) { - unsigned CRBit = I->getOperand(0).getReg(); + Register CRBit = I->getOperand(0).getReg(); BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BDNZ) { BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2); diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 264d6b590f95..d8425d89da92 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -162,7 +162,7 @@ class PPCFastISel final : public FastISel { bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt, unsigned DestReg, const PPC::Predicate Pred); - bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, + bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr, const TargetRegisterClass *RC, bool IsZExt = true, unsigned FP64LoadOpc = PPC::LFD); bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr); @@ -451,7 +451,7 @@ void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset, // Emit a load instruction if possible, returning true if we succeeded, // otherwise false. See commentary below for how the register class of // the load is determined. -bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, +bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr, const TargetRegisterClass *RC, bool IsZExt, unsigned FP64LoadOpc) { unsigned Opc; @@ -469,7 +469,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, (ResultReg ? MRI.getRegClass(ResultReg) : (RC ? RC : (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) : - (VT == MVT::f32 ? (HasSPE ? &PPC::SPE4RCRegClass : &PPC::F4RCRegClass) : + (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) : (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : &PPC::GPRC_and_GPRC_NOR0RegClass))))); @@ -612,7 +612,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) { const TargetRegisterClass *RC = AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; - unsigned ResultReg = 0; + Register ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true, PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD)) return false; @@ -989,7 +989,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) { unsigned DestReg; auto RC = MRI.getRegClass(SrcReg); if (PPCSubTarget->hasSPE()) { - DestReg = createResultReg(&PPC::SPE4RCRegClass); + DestReg = createResultReg(&PPC::GPRCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::EFSCFD), DestReg) .addReg(SrcReg); @@ -1051,7 +1051,7 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, } const TargetRegisterClass *RC = &PPC::F8RCRegClass; - unsigned ResultReg = 0; + Register ResultReg = 0; if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc)) return 0; @@ -1176,7 +1176,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, const TargetRegisterClass *RC = AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; - unsigned ResultReg = 0; + Register ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned)) return 0; @@ -1229,9 +1229,9 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { if (PPCSubTarget->hasSPE()) { DestReg = createResultReg(&PPC::GPRCRegClass); if (IsSigned) - Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ; + Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ; else - Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ; + Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ; } else if (isVSFRCRegClass(RC)) { DestReg = createResultReg(&PPC::VSFRCRegClass); if (DstVT == MVT::i32) @@ -1717,7 +1717,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) { CCValAssign &VA = ValLocs[0]; - unsigned RetReg = VA.getLocReg(); + Register RetReg = VA.getLocReg(); // We still need to worry about properly extending the sign. For example, // we could have only a single bit or a constant that needs zero // extension rather than sign extension. Make sure we pass the return @@ -2002,7 +2002,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { const bool HasSPE = PPCSubTarget->hasSPE(); const TargetRegisterClass *RC; if (HasSPE) - RC = ((VT == MVT::f32) ? &PPC::SPE4RCRegClass : &PPC::SPERCRegClass); + RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass); else RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass); @@ -2031,8 +2031,8 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addImm(0).addReg(TmpReg).addMemOperand(MMO); } else { - // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)). - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), + // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)). + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8), TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx); // But for large code model, we must generate a LDtocL followed // by the LF[SD]. @@ -2085,16 +2085,15 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // or externally available linkage, a non-local function address, or a // jump table address (not yet needed), or if we are generating code // for large code model, we generate: - // LDtocL(GV, ADDIStocHA(%x2, GV)) + // LDtocL(GV, ADDIStocHA8(%x2, GV)) // Otherwise we generate: - // ADDItocL(ADDIStocHA(%x2, GV), GV) - // Either way, start with the ADDIStocHA: + // ADDItocL(ADDIStocHA8(%x2, GV), GV) + // Either way, start with the ADDIStocHA8: unsigned HighPartReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8), HighPartReg).addReg(PPC::X2).addGlobalAddress(GV); - unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV); - if (GVFlags & PPCII::MO_NLP_FLAG) { + if (PPCSubTarget->isGVIndirectSymbol(GV)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), DestReg).addGlobalAddress(GV).addReg(HighPartReg); } else { @@ -2353,7 +2352,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, if (!PPCComputeAddress(LI->getOperand(0), Addr)) return false; - unsigned ResultReg = MI->getOperand(0).getReg(); + Register ResultReg = MI->getOperand(0).getReg(); if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt, PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD)) @@ -2464,7 +2463,7 @@ namespace llvm { const TargetLibraryInfo *LibInfo) { // Only available on 64-bit ELF for now. const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>(); - if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) + if (Subtarget.is64BitELFABI()) return new PPCFastISel(FuncInfo, LibInfo); return nullptr; } diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index ebfb1ef7f49b..06a4d183e781 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -47,13 +47,15 @@ static const MCPhysReg VRRegNo[] = { }; static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { - if (STI.isDarwinABI()) + if (STI.isDarwinABI() || STI.isAIXABI()) return STI.isPPC64() ? 16 : 8; // SVR4 ABI: return STI.isPPC64() ? 16 : 4; } static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { + if (STI.isAIXABI()) + return STI.isPPC64() ? 40 : 20; return STI.isELFv2ABI() ? 24 : 40; } @@ -88,6 +90,11 @@ static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; } +static unsigned computeCRSaveOffset() { + // The condition register save offset needs to be updated for AIX PPC32. + return 8; +} + PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, STI.getPlatformStackAlignment(), 0), @@ -95,7 +102,8 @@ PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) TOCSaveOffset(computeTOCSaveOffset(Subtarget)), FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), LinkageSize(computeLinkageSize(Subtarget)), - BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {} + BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), + CRSaveOffset(computeCRSaveOffset()) {} // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( @@ -370,8 +378,8 @@ static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { return; } - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if ((UsedRegMask & 0xFFFF) == UsedRegMask) { if (DstReg != SrcReg) @@ -781,15 +789,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, bool isPPC64 = Subtarget.isPPC64(); // Get the ABI. bool isSVR4ABI = Subtarget.isSVR4ABI(); + bool isAIXABI = Subtarget.isAIXABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); - assert((Subtarget.isDarwinABI() || isSVR4ABI) && - "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); + assert((Subtarget.isDarwinABI() || isSVR4ABI || isAIXABI) && + "Unsupported PPC ABI."); // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, // process it. if (!isSVR4ABI) for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { + if (isAIXABI) + report_fatal_error("UPDATE_VRSAVE is unexpected on AIX."); HandleVRSaveUpdate(*MBBI, TII); break; } @@ -819,7 +830,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, bool HasRedZone = isPPC64 || !isSVR4ABI; unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; - unsigned BPReg = RegInfo->getBaseRegister(MF); + Register BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; @@ -908,6 +919,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, assert((isPPC64 || !MustSaveCR) && "Prologue CR saving supported only in 64-bit mode"); + if (MustSaveCR && isAIXABI) + report_fatal_error("Prologue CR saving is unimplemented on AIX."); + // Check if we can move the stack update instruction (stdu) down the prologue // past the callee saves. Hopefully this will avoid the situation where the // saves are waiting for the update on the store with update to complete. @@ -966,7 +980,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, MIB.addReg(MustSaveCRs[i], CrState); BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) .addReg(TempReg, getKillRegState(true)) - .addImm(8) + .addImm(getCRSaveOffset()) .addReg(SPReg); } @@ -1020,7 +1034,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, assert(HasRedZone && "A red zone is always available on PPC64"); BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) .addReg(TempReg, getKillRegState(true)) - .addImm(8) + .addImm(getCRSaveOffset()) .addReg(SPReg); } @@ -1324,7 +1338,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // actually saved gets its own CFI record. unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(CRReg, true), 8)); + nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset())); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); continue; @@ -1387,7 +1401,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; - unsigned BPReg = RegInfo->getBaseRegister(MF); + Register BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned ScratchReg = 0; unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg @@ -1590,7 +1604,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // is live here. assert(HasRedZone && "Expecting red zone"); BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) - .addImm(8) + .addImm(getCRSaveOffset()) .addReg(SPReg); for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) @@ -1614,7 +1628,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, assert(isPPC64 && "Expecting 64-bit mode"); assert(RBReg == SPReg && "Should be using SP as a base register"); BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) - .addImm(8) + .addImm(getCRSaveOffset()) .addReg(RBReg); } @@ -1762,8 +1776,8 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); - bool isPPC64 = Subtarget.isPPC64(); - bool isDarwinABI = Subtarget.isDarwinABI(); + const bool isPPC64 = Subtarget.isPPC64(); + const bool IsDarwinABI = Subtarget.isDarwinABI(); MachineFrameInfo &MFI = MF.getFrameInfo(); // If the frame pointer save index hasn't been defined yet. @@ -1812,7 +1826,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the // function uses CR 2, 3, or 4. - if (!isPPC64 && !isDarwinABI && + if (!isPPC64 && !IsDarwinABI && (SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || SavedRegs.test(PPC::CR4))) { @@ -1872,8 +1886,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || (Reg != PPC::X2 && Reg != PPC::R2)) && "Not expecting to try to spill R2 in a function that must save TOC"); - if (PPC::GPRCRegClass.contains(Reg) || - PPC::SPE4RCRegClass.contains(Reg)) { + if (PPC::GPRCRegClass.contains(Reg)) { HasGPSaveArea = true; GPRegs.push_back(CSI[i]); @@ -1967,7 +1980,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, assert(FI && "No Base Pointer Save Slot!"); MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); - unsigned BP = RegInfo->getBaseRegister(MF); + Register BP = RegInfo->getBaseRegister(MF); if (PPC::G8RCRegClass.contains(BP)) { MinG8R = std::min<unsigned>(MinG8R, BP); HasG8SaveArea = true; @@ -2428,6 +2441,26 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +unsigned PPCFrameLowering::getTOCSaveOffset() const { + if (Subtarget.isAIXABI()) + // TOC save/restore is normally handled by the linker. + // Indirect calls should hit this limitation. + report_fatal_error("TOC save is not implemented on AIX yet."); + return TOCSaveOffset; +} + +unsigned PPCFrameLowering::getFramePointerSaveOffset() const { + if (Subtarget.isAIXABI()) + report_fatal_error("FramePointer is not implemented on AIX yet."); + return FramePointerSaveOffset; +} + +unsigned PPCFrameLowering::getBasePointerSaveOffset() const { + if (Subtarget.isAIXABI()) + report_fatal_error("BasePointer is not implemented on AIX yet."); + return BasePointerSaveOffset; +} + bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) return false; diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index d116e9fd22e1..a5fbc9acbb28 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -26,6 +26,7 @@ class PPCFrameLowering: public TargetFrameLowering { const unsigned FramePointerSaveOffset; const unsigned LinkageSize; const unsigned BasePointerSaveOffset; + const unsigned CRSaveOffset; /** * Find register[s] that can be used in function prologue and epilogue @@ -142,15 +143,19 @@ public: /// getTOCSaveOffset - Return the previous frame offset to save the /// TOC register -- 64-bit SVR4 ABI only. - unsigned getTOCSaveOffset() const { return TOCSaveOffset; } + unsigned getTOCSaveOffset() const; /// getFramePointerSaveOffset - Return the previous frame offset to save the /// frame pointer. - unsigned getFramePointerSaveOffset() const { return FramePointerSaveOffset; } + unsigned getFramePointerSaveOffset() const; /// getBasePointerSaveOffset - Return the previous frame offset to save the /// base pointer. - unsigned getBasePointerSaveOffset() const { return BasePointerSaveOffset; } + unsigned getBasePointerSaveOffset() const; + + /// getCRSaveOffset - Return the previous frame offset to save the + /// CR register. + unsigned getCRSaveOffset() const { return CRSaveOffset; } /// getLinkageSize - Return the size of the PowerPC ABI linkage area. /// diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 543cac075f55..4ad6c88233fe 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -371,7 +371,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // by the scheduler. Detect them now. bool HasVectorVReg = false; for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) { HasVectorVReg = true; break; @@ -391,8 +391,8 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // Create two vregs - one to hold the VRSAVE register that is live-in to the // function and one for the value after having bits or'd into it. - unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); - unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); MachineBasicBlock &EntryBB = *Fn.begin(); @@ -447,7 +447,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { } else { BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); - unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::UpdateGBR), GlobalBaseReg) .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); @@ -5065,52 +5065,95 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } case PPCISD::TOC_ENTRY: { - assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) && - "Only supported for 64-bit ABI and 32-bit SVR4"); - if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) { - SDValue GA = N->getOperand(0); - SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, - N->getOperand(1)); - transferMemOperands(N, MN); - ReplaceNode(N, MN); - return; - } + const bool isPPC64 = PPCSubTarget->isPPC64(); + const bool isELFABI = PPCSubTarget->isSVR4ABI(); + const bool isAIXABI = PPCSubTarget->isAIXABI(); + + assert(!PPCSubTarget->isDarwin() && "TOC is an ELF/XCOFF construct"); + + // PowerPC only support small, medium and large code model. + const CodeModel::Model CModel = TM.getCodeModel(); + assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && + "PowerPC doesn't support tiny or kernel code models."); - // For medium and large code model, we generate two instructions as - // described below. Otherwise we allow SelectCodeCommon to handle this, + if (isAIXABI && CModel == CodeModel::Medium) + report_fatal_error("Medium code model is not supported on AIX."); + + // For 64-bit small code model, we allow SelectCodeCommon to handle this, // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. - CodeModel::Model CModel = TM.getCodeModel(); - if (CModel != CodeModel::Medium && CModel != CodeModel::Large) + if (isPPC64 && CModel == CodeModel::Small) break; - // The first source operand is a TargetGlobalAddress or a TargetJumpTable. - // If it must be toc-referenced according to PPCSubTarget, we generate: - // LDtocL(@sym, ADDIStocHA(%x2, @sym)) + // Handle 32-bit small code model. + if (!isPPC64) { + // Transforms the ISD::TOC_ENTRY node to a PPCISD::LWZtoc. + auto replaceWithLWZtoc = [this, &dl](SDNode *TocEntry) { + SDValue GA = TocEntry->getOperand(0); + SDValue TocBase = TocEntry->getOperand(1); + SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, + TocBase); + transferMemOperands(TocEntry, MN); + ReplaceNode(TocEntry, MN); + }; + + if (isELFABI) { + assert(TM.isPositionIndependent() && + "32-bit ELF can only have TOC entries in position independent" + " code."); + // 32-bit ELF always uses a small code model toc access. + replaceWithLWZtoc(N); + return; + } + + if (isAIXABI && CModel == CodeModel::Small) { + replaceWithLWZtoc(N); + return; + } + } + + assert(CModel != CodeModel::Small && "All small code models handled."); + + assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit" + " ELF/AIX or 32-bit AIX in the following."); + + // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode + // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We + // generate two instructions as described below. The first source operand + // is a symbol reference. If it must be toc-referenced according to + // PPCSubTarget, we generate: + // [32-bit AIX] + // LWZtocL(@sym, ADDIStocHA(%r2, @sym)) + // [64-bit ELF/AIX] + // LDtocL(@sym, ADDIStocHA8(%x2, @sym)) // Otherwise we generate: - // ADDItocL(ADDIStocHA(%x2, @sym), @sym) + // ADDItocL(ADDIStocHA8(%x2, @sym), @sym) SDValue GA = N->getOperand(0); SDValue TOCbase = N->getOperand(1); - SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, - TOCbase, GA); + + EVT VT = isPPC64 ? MVT::i64 : MVT::i32; + SDNode *Tmp = CurDAG->getMachineNode( + isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA); + if (PPCLowering->isAccessedAsGotIndirect(GA)) { - // If it is access as got-indirect, we need an extra LD to load + // If it is accessed as got-indirect, we need an extra LWZ/LD to load // the address. - SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, - SDValue(Tmp, 0)); + SDNode *MN = CurDAG->getMachineNode( + isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0)); + transferMemOperands(N, MN); ReplaceNode(N, MN); return; } - // Build the address relative to the TOC-pointer.. + // Build the address relative to the TOC-pointer. ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, SDValue(Tmp, 0), GA)); return; } case PPCISD::PPC32_PICGOT: // Generate a PIC-safe GOT reference. - assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && - "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); + assert(PPCSubTarget->is32BitELFABI() && + "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::i32); @@ -6456,7 +6499,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { continue; if (!HBase.isMachineOpcode() || - HBase.getMachineOpcode() != PPC::ADDIStocHA) + HBase.getMachineOpcode() != PPC::ADDIStocHA8) continue; if (!Base.hasOneUse() || !HBase.hasOneUse()) diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 24d50074860d..8cf6a660b08b 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -139,13 +139,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. bool isPPC64 = Subtarget.isPPC64(); - setMinStackArgumentAlignment(isPPC64 ? 8:4); + setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4)); // Set up the register classes. addRegisterClass(MVT::i32, &PPC::GPRCRegClass); if (!useSoftFloat()) { if (hasSPE()) { - addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass); + addRegisterClass(MVT::f32, &PPC::GPRCRegClass); addRegisterClass(MVT::f64, &PPC::SPERCRegClass); } else { addRegisterClass(MVT::f32, &PPC::F4RCRegClass); @@ -431,28 +431,26 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - if (Subtarget.isSVR4ABI()) { - if (isPPC64) { - // VAARG always uses double-word chunks, so promote anything smaller. - setOperationAction(ISD::VAARG, MVT::i1, Promote); - AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); - setOperationAction(ISD::VAARG, MVT::i8, Promote); - AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); - setOperationAction(ISD::VAARG, MVT::i16, Promote); - AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); - setOperationAction(ISD::VAARG, MVT::i32, Promote); - AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); - setOperationAction(ISD::VAARG, MVT::Other, Expand); - } else { - // VAARG is custom lowered with the 32-bit SVR4 ABI. - setOperationAction(ISD::VAARG, MVT::Other, Custom); - setOperationAction(ISD::VAARG, MVT::i64, Custom); - } + if (Subtarget.is64BitELFABI()) { + // VAARG always uses double-word chunks, so promote anything smaller. + setOperationAction(ISD::VAARG, MVT::i1, Promote); + AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i8, Promote); + AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i16, Promote); + AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i32, Promote); + AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + } else if (Subtarget.is32BitELFABI()) { + // VAARG is custom lowered with the 32-bit SVR4 ABI. + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::i64, Custom); } else setOperationAction(ISD::VAARG, MVT::Other, Expand); - if (Subtarget.isSVR4ABI() && !isPPC64) - // VACOPY is custom lowered with the 32-bit SVR4 ABI. + // VACOPY is custom lowered with the 32-bit SVR4 ABI. + if (Subtarget.is32BitELFABI()) setOperationAction(ISD::VACOPY , MVT::Other, Custom); else setOperationAction(ISD::VACOPY , MVT::Other, Expand); @@ -553,17 +551,25 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (Subtarget.hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); // For v2i64, these are only valid with P8Vector. This is corrected after // the loop. - setOperationAction(ISD::SMAX, VT, Legal); - setOperationAction(ISD::SMIN, VT, Legal); - setOperationAction(ISD::UMAX, VT, Legal); - setOperationAction(ISD::UMIN, VT, Legal); + if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) { + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); + } + else { + setOperationAction(ISD::SMAX, VT, Expand); + setOperationAction(ISD::SMIN, VT, Expand); + setOperationAction(ISD::UMAX, VT, Expand); + setOperationAction(ISD::UMIN, VT, Expand); + } if (Subtarget.hasVSX()) { setOperationAction(ISD::FMAXNUM, VT, Legal); @@ -646,7 +652,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); - for (MVT InnerVT : MVT::vector_valuetypes()) { + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); @@ -944,7 +950,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); - setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); setOperationAction(ISD::FNEG , MVT::v4f64, Legal); @@ -1118,6 +1123,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::TRUNCATE); + setTargetDAGCombine(ISD::VECTOR_SHUFFLE); + if (Subtarget.useCRBits()) { setTargetDAGCombine(ISD::TRUNCATE); @@ -1172,9 +1179,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setJumpIsExpensive(); } - setMinFunctionAlignment(2); + setMinFunctionAlignment(Align(4)); if (Subtarget.isDarwin()) - setPrefFunctionAlignment(4); + setPrefFunctionAlignment(Align(16)); switch (Subtarget.getDarwinDirective()) { default: break; @@ -1191,8 +1198,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: - setPrefFunctionAlignment(4); - setPrefLoopAlignment(4); + setPrefLoopAlignment(Align(16)); + setPrefFunctionAlignment(Align(16)); break; } @@ -1352,6 +1359,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::SExtVElems: return "PPCISD::SExtVElems"; case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; + case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE"; + case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE"; case PPCISD::ST_VSR_SCAL_INT: return "PPCISD::ST_VSR_SCAL_INT"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; @@ -1396,7 +1405,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE"; case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH"; - case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH"; + case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF"; + case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; } return nullptr; } @@ -1517,7 +1527,7 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { const PPCSubtarget& Subtarget = - static_cast<const PPCSubtarget&>(DAG.getSubtarget()); + static_cast<const PPCSubtarget&>(DAG.getSubtarget()); if (!Subtarget.hasP8Vector()) return false; @@ -1769,10 +1779,10 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to -/// VSPLTB/VSPLTH/VSPLTW. +/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.). bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { - assert(N->getValueType(0) == MVT::v16i8 && - (EltSize == 1 || EltSize == 2 || EltSize == 4)); + assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && + EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"); // The consecutive indices need to specify an element, not part of two // different elements. So abandon ship early if this isn't the case. @@ -2065,10 +2075,11 @@ bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, } -/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the -/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. -unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, - SelectionDAG &DAG) { +/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is +/// appropriate for PPC mnemonics (which have a big endian bias - namely +/// elements are counted from the left of the vector register). +unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, + SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); assert(isSplatShuffleMask(SVOp, EltSize)); if (DAG.getDataLayout().isLittleEndian()) @@ -2667,12 +2678,14 @@ static void setUsesTOCBasePtr(SelectionDAG &DAG) { setUsesTOCBasePtr(DAG.getMachineFunction()); } -static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, - SDValue GA) { +SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, + SDValue GA) const { + const bool Is64Bit = Subtarget.isPPC64(); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; - SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : - DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); - + SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) + : Subtarget.isAIXABI() + ? DAG.getRegister(PPC::R2, VT) + : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); SDValue Ops[] = { GA, Reg }; return DAG.getMemIntrinsicNode( PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, @@ -2688,10 +2701,10 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { + if (Subtarget.is64BitELFABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); - return getTOCEntry(DAG, SDLoc(CP), true, GA); + return getTOCEntry(DAG, SDLoc(CP), GA); } unsigned MOHiFlag, MOLoFlag; @@ -2701,7 +2714,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), PPCII::MO_PIC_FLAG); - return getTOCEntry(DAG, SDLoc(CP), false, GA); + return getTOCEntry(DAG, SDLoc(CP), GA); } SDValue CPIHi = @@ -2764,10 +2777,10 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { + if (Subtarget.is64BitELFABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - return getTOCEntry(DAG, SDLoc(JT), true, GA); + return getTOCEntry(DAG, SDLoc(JT), GA); } unsigned MOHiFlag, MOLoFlag; @@ -2777,7 +2790,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, PPCII::MO_PIC_FLAG); - return getTOCEntry(DAG, SDLoc(GA), false, GA); + return getTOCEntry(DAG, SDLoc(GA), GA); } SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); @@ -2793,14 +2806,18 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, // 64-bit SVR4 ABI code is always position-independent. // The actual BlockAddress is stored in the TOC. - if (Subtarget.isSVR4ABI() && - (Subtarget.isPPC64() || isPositionIndependent())) { - if (Subtarget.isPPC64()) - setUsesTOCBasePtr(DAG); + if (Subtarget.is64BitELFABI()) { + setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); - return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA); + return getTOCEntry(DAG, SDLoc(BASDN), GA); } + // 32-bit position-independent ELF stores the BlockAddress in the .got. + if (Subtarget.is32BitELFABI() && isPositionIndependent()) + return getTOCEntry( + DAG, SDLoc(BASDN), + DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset())); + unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); @@ -2913,12 +2930,12 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SDLoc DL(GSDN); const GlobalValue *GV = GSDN->getGlobal(); - // 64-bit SVR4 ABI code is always position-independent. + // 64-bit SVR4 ABI & AIX ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { + if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); - return getTOCEntry(DAG, DL, true, GA); + return getTOCEntry(DAG, DL, GA); } unsigned MOHiFlag, MOLoFlag; @@ -2929,7 +2946,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), PPCII::MO_PIC_FLAG); - return getTOCEntry(DAG, DL, false, GA); + return getTOCEntry(DAG, DL, GA); } SDValue GAHi = @@ -3235,8 +3252,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV, nextOffset)); } -/// FPR - The set of FP registers that should be allocated for arguments, -/// on Darwin. +/// FPR - The set of FP registers that should be allocated for arguments +/// on Darwin and AIX. static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13}; @@ -3377,17 +3394,17 @@ SDValue PPCTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - if (Subtarget.isSVR4ABI()) { - if (Subtarget.isPPC64()) - return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, - dl, DAG, InVals); - else - return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, - dl, DAG, InVals); - } else { - return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, - dl, DAG, InVals); - } + if (Subtarget.is64BitELFABI()) + return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, + InVals); + else if (Subtarget.is32BitELFABI()) + return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, + InVals); + + // FIXME: We are using this for both AIX and Darwin. We should add appropriate + // AIX testing, and rename it appropriately. + return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG, + InVals); } SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( @@ -3467,7 +3484,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( if (Subtarget.hasP8Vector()) RC = &PPC::VSSRCRegClass; else if (Subtarget.hasSPE()) - RC = &PPC::SPE4RCRegClass; + RC = &PPC::GPRCRegClass; else RC = &PPC::F4RCRegClass; break; @@ -4516,7 +4533,7 @@ callsShareTOCBase(const Function *Caller, SDValue Callee, static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl<ISD::OutputArg> &Outs) { - assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); + assert(Subtarget.is64BitELFABI()); const unsigned PtrByteSize = 8; const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); @@ -4926,7 +4943,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, ImmutableCallSite CS, const PPCSubtarget &Subtarget) { bool isPPC64 = Subtarget.isPPC64(); bool isSVR4ABI = Subtarget.isSVR4ABI(); - bool isELFv2ABI = Subtarget.isELFv2ABI(); + bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI(); bool isAIXABI = Subtarget.isAIXABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); @@ -4997,7 +5014,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; - if (isSVR4ABI && isPPC64 && !isELFv2ABI) { + if (is64BitELFv1ABI) { // Function pointers in the 64-bit SVR4 ABI do not point to the function // entry point, but to the function descriptor (the function entry point // address is part of the function descriptor though). @@ -5085,7 +5102,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, CallOpc = PPCISD::BCTRL; Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) - if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) + if (is64BitELFv1ABI && !hasNest) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) @@ -6730,8 +6747,12 @@ SDValue PPCTargetLowering::LowerCall_AIX( const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64) : array_lengthof(GPR_32); + const unsigned NumFPRs = array_lengthof(FPR); + assert(NumFPRs == 13 && "Only FPR 1-13 could be used for parameter passing " + "on AIX"); + const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; - unsigned GPR_idx = 0; + unsigned GPR_idx = 0, FPR_idx = 0; SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; @@ -6768,6 +6789,20 @@ SDValue PPCTargetLowering::LowerCall_AIX( break; case MVT::f32: case MVT::f64: + if (FPR_idx != NumFPRs) { + RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); + + // If we have any FPRs remaining, we may also have GPRs remaining. + // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available + // GPRs. + if (GPR_idx != NumGPRs) + ++GPR_idx; + if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64) + ++GPR_idx; + } else + report_fatal_error("Handling of placing parameters on the stack is " + "unimplemented!"); + break; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: @@ -8152,6 +8187,18 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { Op0.getOperand(1)); } +static const SDValue *getNormalLoadInput(const SDValue &Op) { + const SDValue *InputLoad = &Op; + if (InputLoad->getOpcode() == ISD::BITCAST) + InputLoad = &InputLoad->getOperand(0); + if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR) + InputLoad = &InputLoad->getOperand(0); + if (InputLoad->getOpcode() != ISD::LOAD) + return nullptr; + LoadSDNode *LD = cast<LoadSDNode>(*InputLoad); + return ISD::isNormalLoad(LD) ? InputLoad : nullptr; +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen @@ -8274,6 +8321,34 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || SplatBitSize > 32) { + + const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0)); + // Handle load-and-splat patterns as we have instructions that will do this + // in one go. + if (InputLoad && DAG.isSplatValue(Op, true)) { + LoadSDNode *LD = cast<LoadSDNode>(*InputLoad); + + // We have handling for 4 and 8 byte elements. + unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits(); + + // Checking for a single use of this load, we have to check for vector + // width (128 bits) / ElementSize uses (since each operand of the + // BUILD_VECTOR is a separate use of the value. + if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) && + ((Subtarget.hasVSX() && ElementSize == 64) || + (Subtarget.hasP9Vector() && ElementSize == 32))) { + SDValue Ops[] = { + LD->getChain(), // Chain + LD->getBasePtr(), // Ptr + DAG.getValueType(Op.getValueType()) // VT + }; + return + DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, + DAG.getVTList(Op.getValueType(), MVT::Other), + Ops, LD->getMemoryVT(), LD->getMemOperand()); + } + } + // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be // lowered to VSX instructions under certain conditions. // Without VSX, there is no pattern more efficient than expanding the node. @@ -8759,6 +8834,45 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, unsigned ShiftElts, InsertAtByte; bool Swap = false; + + // If this is a load-and-splat, we can do that with a single instruction + // in some cases. However if the load has multiple uses, we don't want to + // combine it because that will just produce multiple loads. + const SDValue *InputLoad = getNormalLoadInput(V1); + if (InputLoad && Subtarget.hasVSX() && V2.isUndef() && + (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) && + InputLoad->hasOneUse()) { + bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4); + int SplatIdx = + PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG); + + LoadSDNode *LD = cast<LoadSDNode>(*InputLoad); + // For 4-byte load-and-splat, we need Power9. + if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) { + uint64_t Offset = 0; + if (IsFourByte) + Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4; + else + Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8; + SDValue BasePtr = LD->getBasePtr(); + if (Offset != 0) + BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), + BasePtr, DAG.getIntPtrConstant(Offset, dl)); + SDValue Ops[] = { + LD->getChain(), // Chain + BasePtr, // BasePtr + DAG.getValueType(Op.getValueType()) // VT + }; + SDVTList VTL = + DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other); + SDValue LdSplt = + DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL, + Ops, LD->getMemoryVT(), LD->getMemOperand()); + if (LdSplt.getValueType() != SVOp->getValueType(0)) + LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt); + return LdSplt; + } + } if (Subtarget.hasP9Vector() && PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, isLittleEndian)) { @@ -8835,7 +8949,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { - int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); + int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG); SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, @@ -9880,6 +9994,30 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { switch (Op0.getOpcode()) { default: return SDValue(); + case ISD::EXTRACT_SUBVECTOR: { + assert(Op0.getNumOperands() == 2 && + isa<ConstantSDNode>(Op0->getOperand(1)) && + "Node should have 2 operands with second one being a constant!"); + + if (Op0.getOperand(0).getValueType() != MVT::v4f32) + return SDValue(); + + // Custom lower is only done for high or low doubleword. + int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue(); + if (Idx % 2 != 0) + return SDValue(); + + // Since input is v4f32, at this point Idx is either 0 or 2. + // Shift to get the doubleword position we want. + int DWord = Idx >> 1; + + // High and low word positions are different on little endian. + if (Subtarget.isLittleEndian()) + DWord ^= 0x1; + + return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, + Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32)); + } case ISD::FADD: case ISD::FMUL: case ISD::FSUB: { @@ -9891,26 +10029,25 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { return SDValue(); // Generate new load node. LoadSDNode *LD = cast<LoadSDNode>(LdOp); - SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() }; - NewLoad[i] = - DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl, - DAG.getVTList(MVT::v4f32, MVT::Other), - LoadOps, LD->getMemoryVT(), - LD->getMemOperand()); - } - SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, - NewLoad[0], NewLoad[1], - Op0.getNode()->getFlags()); - return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp); + SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()}; + NewLoad[i] = DAG.getMemIntrinsicNode( + PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps, + LD->getMemoryVT(), LD->getMemOperand()); + } + SDValue NewOp = + DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0], + NewLoad[1], Op0.getNode()->getFlags()); + return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp, + DAG.getConstant(0, dl, MVT::i32)); } case ISD::LOAD: { LoadSDNode *LD = cast<LoadSDNode>(Op0); - SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() }; - SDValue NewLd = - DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl, - DAG.getVTList(MVT::v4f32, MVT::Other), - LoadOps, LD->getMemoryVT(), LD->getMemOperand()); - return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd); + SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()}; + SDValue NewLd = DAG.getMemIntrinsicNode( + PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps, + LD->getMemoryVT(), LD->getMemOperand()); + return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd, + DAG.getConstant(0, dl, MVT::i32)); } } llvm_unreachable("ERROR:Should return for all cases within swtich."); @@ -10048,9 +10185,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, return; case ISD::TRUNCATE: { EVT TrgVT = N->getValueType(0); + EVT OpVT = N->getOperand(0).getValueType(); if (TrgVT.isVector() && isOperationCustom(N->getOpcode(), TrgVT) && - N->getOperand(0).getValueType().getSizeInBits() <= 128) + OpVT.getSizeInBits() <= 128 && + isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits())) Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG)); return; } @@ -10192,7 +10331,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, if (CmpOpcode) { // Signed comparisons of byte or halfword values must be sign-extended. if (CmpOpcode == PPC::CMPW && AtomicSize < 4) { - unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH), ExtReg).addReg(dest); BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) @@ -10243,10 +10382,10 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); - unsigned dest = MI.getOperand(0).getReg(); - unsigned ptrA = MI.getOperand(1).getReg(); - unsigned ptrB = MI.getOperand(2).getReg(); - unsigned incr = MI.getOperand(3).getReg(); + Register dest = MI.getOperand(0).getReg(); + Register ptrA = MI.getOperand(1).getReg(); + Register ptrB = MI.getOperand(2).getReg(); + Register incr = MI.getOperand(3).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -10364,7 +10503,7 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( if (CmpOpcode) { // For unsigned comparisons, we can directly compare the shifted values. // For signed comparisons we shift and sign extend. - unsigned SReg = RegInfo.createVirtualRegister(GPRC); + Register SReg = RegInfo.createVirtualRegister(GPRC); BuildMI(BB, dl, TII->get(PPC::AND), SReg) .addReg(TmpDestReg) .addReg(MaskReg); @@ -10375,7 +10514,7 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) .addReg(SReg) .addReg(ShiftReg); - unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC); + Register ValueSReg = RegInfo.createVirtualRegister(GPRC); BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) .addReg(ValueReg); ValueReg = ValueSReg; @@ -10426,11 +10565,11 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = ++MBB->getIterator(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); - unsigned mainDstReg = MRI.createVirtualRegister(RC); - unsigned restoreDstReg = MRI.createVirtualRegister(RC); + Register mainDstReg = MRI.createVirtualRegister(RC); + Register restoreDstReg = MRI.createVirtualRegister(RC); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && @@ -10482,10 +10621,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, // Prepare IP either in reg. const TargetRegisterClass *PtrRC = getRegClassFor(PVT); - unsigned LabelReg = MRI.createVirtualRegister(PtrRC); - unsigned BufReg = MI.getOperand(1).getReg(); + Register LabelReg = MRI.createVirtualRegister(PtrRC); + Register BufReg = MI.getOperand(1).getReg(); - if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { + if (Subtarget.is64BitELFABI()) { setUsesTOCBasePtr(*MBB->getParent()); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) .addReg(PPC::X2) @@ -10570,7 +10709,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, const TargetRegisterClass *RC = (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; - unsigned Tmp = MRI.createVirtualRegister(RC); + Register Tmp = MRI.createVirtualRegister(RC); // Since FP is only updated here but NOT referenced, it's treated as GPR. unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; @@ -10587,7 +10726,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, const int64_t TOCOffset = 3 * PVT.getStoreSize(); const int64_t BPOffset = 4 * PVT.getStoreSize(); - unsigned BufReg = MI.getOperand(0).getReg(); + Register BufReg = MI.getOperand(0).getReg(); // Reload FP (the jumped-to function may not have had a // frame pointer, and if so, then its r31 will be restored @@ -10662,7 +10801,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { if (MI.getOpcode() == TargetOpcode::STACKMAP || MI.getOpcode() == TargetOpcode::PATCHPOINT) { - if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && + if (Subtarget.is64BitELFABI() && MI.getOpcode() == TargetOpcode::PATCHPOINT) { // Call lowering should have added an r2 operand to indicate a dependence // on the TOC base pointer value. It can't however, because there is no @@ -10828,15 +10967,15 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, BB = readMBB; MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); - unsigned LoReg = MI.getOperand(0).getReg(); - unsigned HiReg = MI.getOperand(1).getReg(); + Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + Register LoReg = MI.getOperand(0).getReg(); + Register HiReg = MI.getOperand(1).getReg(); BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); - unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) .addReg(HiReg) @@ -10978,11 +11117,11 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, StoreMnemonic = PPC::STDCX; break; } - unsigned dest = MI.getOperand(0).getReg(); - unsigned ptrA = MI.getOperand(1).getReg(); - unsigned ptrB = MI.getOperand(2).getReg(); - unsigned oldval = MI.getOperand(3).getReg(); - unsigned newval = MI.getOperand(4).getReg(); + Register dest = MI.getOperand(0).getReg(); + Register ptrA = MI.getOperand(1).getReg(); + Register ptrB = MI.getOperand(2).getReg(); + Register oldval = MI.getOperand(3).getReg(); + Register newval = MI.getOperand(4).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -11057,11 +11196,11 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, bool isLittleEndian = Subtarget.isLittleEndian(); bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; - unsigned dest = MI.getOperand(0).getReg(); - unsigned ptrA = MI.getOperand(1).getReg(); - unsigned ptrB = MI.getOperand(2).getReg(); - unsigned oldval = MI.getOperand(3).getReg(); - unsigned newval = MI.getOperand(4).getReg(); + Register dest = MI.getOperand(0).getReg(); + Register ptrA = MI.getOperand(1).getReg(); + Register ptrB = MI.getOperand(2).getReg(); + Register oldval = MI.getOperand(3).getReg(); + Register newval = MI.getOperand(4).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -11238,13 +11377,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // This pseudo performs an FADD with rounding mode temporarily forced // to round-to-zero. We emit this via custom inserter since the FPSCR // is not modeled at the SelectionDAG level. - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Src1 = MI.getOperand(1).getReg(); - unsigned Src2 = MI.getOperand(2).getReg(); + Register Dest = MI.getOperand(0).getReg(); + Register Src1 = MI.getOperand(1).getReg(); + Register Src2 = MI.getOperand(2).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); @@ -11270,7 +11409,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned Dest = RegInfo.createVirtualRegister( + Register Dest = RegInfo.createVirtualRegister( Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass); DebugLoc dl = MI.getDebugLoc(); @@ -11283,7 +11422,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } else if (MI.getOpcode() == PPC::TCHECK_RET) { DebugLoc Dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) @@ -11297,7 +11436,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addReg(PPC::CR0EQ); } else if (MI.getOpcode() == PPC::SETRNDi) { DebugLoc dl = MI.getDebugLoc(); - unsigned OldFPSCRReg = MI.getOperand(0).getReg(); + Register OldFPSCRReg = MI.getOperand(0).getReg(); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); @@ -11378,7 +11517,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } }; - unsigned OldFPSCRReg = MI.getOperand(0).getReg(); + Register OldFPSCRReg = MI.getOperand(0).getReg(); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); @@ -11393,12 +11532,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // mtfsf 255, NewFPSCRReg MachineOperand SrcOp = MI.getOperand(1); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg); - unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); - unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); // The first operand of INSERT_SUBREG should be a register which has // subregisters, we only care about its RegClass, so we should use an @@ -11409,14 +11548,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .add(SrcOp) .addImm(1); - unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg) .addReg(OldFPSCRTmpReg) .addReg(ExtSrcReg) .addImm(0) .addImm(62); - unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg); // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63 @@ -13113,6 +13252,61 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, return Val; } +SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN, + LSBaseSDNode *LSBase, + DAGCombinerInfo &DCI) const { + assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && + "Not a reverse memop pattern!"); + + auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool { + auto Mask = SVN->getMask(); + int i = 0; + auto I = Mask.rbegin(); + auto E = Mask.rend(); + + for (; I != E; ++I) { + if (*I != i) + return false; + i++; + } + return true; + }; + + SelectionDAG &DAG = DCI.DAG; + EVT VT = SVN->getValueType(0); + + if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX()) + return SDValue(); + + // Before P9, we have PPCVSXSwapRemoval pass to hack the element order. + // See comment in PPCVSXSwapRemoval.cpp. + // It is conflict with PPCVSXSwapRemoval opt. So we don't do it. + if (!Subtarget.hasP9Vector()) + return SDValue(); + + if(!IsElementReverse(SVN)) + return SDValue(); + + if (LSBase->getOpcode() == ISD::LOAD) { + SDLoc dl(SVN); + SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()}; + return DAG.getMemIntrinsicNode( + PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps, + LSBase->getMemoryVT(), LSBase->getMemOperand()); + } + + if (LSBase->getOpcode() == ISD::STORE) { + SDLoc dl(LSBase); + SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0), + LSBase->getBasePtr()}; + return DAG.getMemIntrinsicNode( + PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps, + LSBase->getMemoryVT(), LSBase->getMemOperand()); + } + + llvm_unreachable("Expected a load or store node here"); +} + SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -13159,6 +13353,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return combineFPToIntToFP(N, DCI); + case ISD::VECTOR_SHUFFLE: + if (ISD::isNormalLoad(N->getOperand(0).getNode())) { + LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0)); + return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI); + } + break; case ISD::STORE: { EVT Op1VT = N->getOperand(1).getValueType(); @@ -13170,6 +13370,13 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return Val; } + if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1)); + SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI); + if (Val) + return Val; + } + // Turn STORE (BSWAP) -> sthbrx/stwbrx. if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && @@ -13903,7 +14110,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, } } -unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { +Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { switch (Subtarget.getDarwinDirective()) { default: break; case PPC::DIR_970: @@ -13924,7 +14131,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { // Actual alignment of the loop will depend on the hotness check and other // logic in alignBlocks. if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty()) - return 5; + return Align(32); } const PPCInstrInfo *TII = Subtarget.getInstrInfo(); @@ -13940,7 +14147,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { } if (LoopSize > 16 && LoopSize <= 32) - return 5; + return Align(32); break; } @@ -14063,7 +14270,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case 'f': if (Subtarget.hasSPE()) { if (VT == MVT::f32 || VT == MVT::i32) - return std::make_pair(0U, &PPC::SPE4RCRegClass); + return std::make_pair(0U, &PPC::GPRCRegClass); if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::SPERCRegClass); } else { @@ -14306,22 +14513,22 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { +Register PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { bool isPPC64 = Subtarget.isPPC64(); - bool isDarwinABI = Subtarget.isDarwinABI(); + bool IsDarwinABI = Subtarget.isDarwinABI(); if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || (!isPPC64 && VT != MVT::i32)) report_fatal_error("Invalid register global variable type"); bool is64Bit = isPPC64 && VT == MVT::i64; - unsigned Reg = StringSwitch<unsigned>(RegName) + Register Reg = StringSwitch<Register>(RegName) .Case("r1", is64Bit ? PPC::X1 : PPC::R1) - .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2) - .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : + .Case("r2", (IsDarwinABI || isPPC64) ? Register() : PPC::R2) + .Case("r13", (!isPPC64 && IsDarwinABI) ? Register() : (is64Bit ? PPC::X13 : PPC::R13)) - .Default(0); + .Default(Register()); if (Reg) return Reg; @@ -14330,14 +14537,17 @@ unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const { // 32-bit SVR4 ABI access everything as got-indirect. - if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) + if (Subtarget.is32BitELFABI()) + return true; + + // AIX accesses everything indirectly through the TOC, which is similar to + // the GOT. + if (Subtarget.isAIXABI()) return true; CodeModel::Model CModel = getTargetMachine().getCodeModel(); // If it is small or large code model, module locals are accessed - // indirectly by loading their address from .toc/.got. The difference - // is that for large code model we have ADDISTocHa + LDtocL and for - // small code model we simply have LDtoc. + // indirectly by loading their address from .toc/.got. if (CModel == CodeModel::Small || CModel == CodeModel::Large) return true; @@ -14345,14 +14555,8 @@ bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const { if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA)) return true; - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { - const GlobalValue *GV = G->getGlobal(); - unsigned char GVFlags = Subtarget.classifyGlobalReference(GV); - // The NLP flag indicates that a global access has to use an - // extra indirection. - if (GVFlags & PPCII::MO_NLP_FLAG) - return true; - } + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) + return Subtarget.isGVIndirectSymbol(G->getGlobal()); return false; } @@ -14417,7 +14621,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; - Info.align = 1; + Info.align = Align::None(); Info.flags = MachineMemOperand::MOLoad; return true; } @@ -14451,7 +14655,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.size = VT.getStoreSize(); - Info.align = 1; + Info.align = Align::None(); Info.flags = MachineMemOperand::MOLoad; return true; } @@ -14503,7 +14707,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(1); Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; - Info.align = 1; + Info.align = Align::None(); Info.flags = MachineMemOperand::MOStore; return true; } @@ -14536,7 +14740,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.size = VT.getStoreSize(); - Info.align = 1; + Info.align = Align::None(); Info.flags = MachineMemOperand::MOStore; return true; } @@ -14786,7 +14990,7 @@ void PPCTargetLowering::insertCopiesSplitCSR( else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be @@ -15146,7 +15350,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const { bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // Only duplicate to increase tail-calls for the 64bit SysV ABIs. - if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) + if (!Subtarget.is64BitELFABI()) return false; // If not a tail call then no need to proceed. diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 97422c6eda36..62922ea2d4c4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -412,8 +412,9 @@ namespace llvm { /// representation. QBFLT, - /// Custom extend v4f32 to v2f64. - FP_EXTEND_LH, + /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or + /// lower (IDX=1) half of v4f32 to v2f64. + FP_EXTEND_HALF, /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of @@ -456,15 +457,29 @@ namespace llvm { /// an xxswapd. LXVD2X, + /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian. + /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on + /// the vector type to load vector in big-endian element order. + LOAD_VEC_BE, + /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a /// v2f32 value into the lower half of a VSR register. LD_VSX_LH, + /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory + /// instructions such as LXVDSX, LXVWSX. + LD_SPLAT, + /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. /// Maps directly to an stxvd2x instruction that will be preceded by /// an xxswapd. STXVD2X, + /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian. + /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on + /// the vector type to store vector in big-endian element order. + STORE_VEC_BE, + /// Store scalar integers from VSR. ST_VSR_SCAL_INT, @@ -563,9 +578,11 @@ namespace llvm { bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE); - /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the - /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. - unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); + /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is + /// appropriate for PPC mnemonics (which have a big endian bias - namely + /// elements are counted from the left of the vector register). + unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, + SelectionDAG &DAG); /// get_VSPLTI_elt - If this is a build_vector of constants which can be /// formed by using a vspltis[bhw] instruction of the specified element @@ -716,8 +733,8 @@ namespace llvm { SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl<SDNode *> &Created) const override; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, @@ -725,7 +742,7 @@ namespace llvm { const SelectionDAG &DAG, unsigned Depth = 0) const override; - unsigned getPrefLoopAlignment(MachineLoop *ML) const override; + Align getPrefLoopAlignment(MachineLoop *ML) const override; bool shouldInsertFencesForAtomic(const Instruction *I) const override { return true; @@ -834,6 +851,18 @@ namespace llvm { return true; } + bool isDesirableToTransformToIntegerOp(unsigned Opc, + EVT VT) const override { + // Only handle float load/store pair because float(fpr) load/store + // instruction has more cycles than integer(gpr) load/store in PPC. + if (Opc != ISD::LOAD && Opc != ISD::STORE) + return false; + if (VT != MVT::f32 && VT != MVT::f64) + return false; + + return true; + } + // Returns true if the address of the global is stored in TOC entry. bool isAccessedAsGotIndirect(SDValue N) const; @@ -998,6 +1027,8 @@ namespace llvm { SDValue &FPOpOut, const SDLoc &dl) const; + SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; @@ -1155,6 +1186,8 @@ namespace llvm { SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase, + DAGCombinerInfo &DCI) const; /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces /// SETCC with integer subtraction when (1) there is a legal way of doing it diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index d598567f8e4e..f16187149d36 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1099,8 +1099,8 @@ def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src), // Support for medium and large code model. let hasSideEffects = 0 in { let isReMaterializable = 1 in { -def ADDIStocHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), - "#ADDIStocHA", []>, isPPC64; +def ADDIStocHA8: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), + "#ADDIStocHA8", []>, isPPC64; def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), "#ADDItocL", []>, isPPC64; } diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 8176c5120a83..fd3fc2af2327 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -215,21 +215,21 @@ def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), // VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm. def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG), SDLoc(N)); + return getI32Imm(PPC::getSplatIdxForPPCMnemonics(N, 1, *CurDAG), SDLoc(N)); }]>; def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1); }], VSPLTB_get_imm>; def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG), SDLoc(N)); + return getI32Imm(PPC::getSplatIdxForPPCMnemonics(N, 2, *CurDAG), SDLoc(N)); }]>; def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2); }], VSPLTH_get_imm>; def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG), SDLoc(N)); + return getI32Imm(PPC::getSplatIdxForPPCMnemonics(N, 4, *CurDAG), SDLoc(N)); }]>; def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ @@ -331,7 +331,7 @@ class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> : VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX), !strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP, - [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>; + [(set Ty:$vD, (IntID Ty:$vA, timm:$ST, timm:$SIX))]>; //===----------------------------------------------------------------------===// // Instruction Definitions. @@ -401,10 +401,10 @@ let isCodeGenOnly = 1 in { def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), "mfvscr $vD", IIC_LdStStore, - [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; + [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), "mtvscr $vB", IIC_LdStLoad, - [(int_ppc_altivec_mtvscr v4i32:$vB)]>; + [(int_ppc_altivec_mtvscr v4i32:$vB)]>; let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads. def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src), diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index a48eb1690695..96b9c9a119c0 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -1209,20 +1209,13 @@ class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = XT{5}; } -class XX3Form_Zero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, +class XX3Form_SameOp<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : XX3Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> { let XA = XT; let XB = XT; } -class XX3Form_SetZero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list<dag> pattern> - : XX3Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> { - let XB = XT; - let XA = XT; -} - class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : I<opcode, OOL, IOL, asmstr, itin> { diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index a787bdd56b9d..6b10672965c9 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -90,7 +90,6 @@ enum SpillOpcodeKey { SOK_QuadBitSpill, SOK_SpillToVSR, SOK_SPESpill, - SOK_SPE4Spill, SOK_LastOpcodeSpill // This must be last on the enum. }; @@ -184,10 +183,10 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; const MachineOperand &DefMO = DefMI.getOperand(DefIdx); - unsigned Reg = DefMO.getReg(); + Register Reg = DefMO.getReg(); bool IsRegCR; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { const MachineRegisterInfo *MRI = &DefMI.getParent()->getParent()->getRegInfo(); IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) || @@ -330,11 +329,13 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, case PPC::LIS8: case PPC::QVGPCI: case PPC::ADDIStocHA: + case PPC::ADDIStocHA8: case PPC::ADDItocL: case PPC::LOAD_STACK_GUARD: case PPC::XXLXORz: case PPC::XXLXORspz: case PPC::XXLXORdpz: + case PPC::XXLEQVOnes: case PPC::V_SET0B: case PPC::V_SET0H: case PPC::V_SET0: @@ -448,7 +449,8 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return &MI; } -bool PPCInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, +bool PPCInstrInfo::findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { // For VSX A-Type FMA instructions, it is the first two operands that can be // commuted, however, because the non-encoded tied input operand is listed @@ -966,11 +968,11 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, getKillRegState(KillSrc); return; } else if (PPC::SPERCRegClass.contains(SrcReg) && - PPC::SPE4RCRegClass.contains(DestReg)) { + PPC::GPRCRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg); getKillRegState(KillSrc); return; - } else if (PPC::SPE4RCRegClass.contains(SrcReg) && + } else if (PPC::GPRCRegClass.contains(SrcReg) && PPC::SPERCRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg); getKillRegState(KillSrc); @@ -1009,8 +1011,6 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = PPC::QVFMRb; else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; - else if (PPC::SPE4RCRegClass.contains(DestReg, SrcReg)) - Opc = PPC::OR; else if (PPC::SPERCRegClass.contains(DestReg, SrcReg)) Opc = PPC::EVOR; else @@ -1043,8 +1043,6 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg, OpcodeIndex = SOK_Float4Spill; } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_SPESpill; - } else if (PPC::SPE4RCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { @@ -1083,8 +1081,6 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg, OpcodeIndex = SOK_Float4Spill; } else if (PPC::SPERCRegClass.contains(Reg)) { OpcodeIndex = SOK_SPESpill; - } else if (PPC::SPE4RCRegClass.contains(Reg)) { - OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.contains(Reg)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.contains(Reg)) { @@ -1133,8 +1129,6 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg, OpcodeIndex = SOK_Float4Spill; } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_SPESpill; - } else if (PPC::SPE4RCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { @@ -1173,8 +1167,6 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg, OpcodeIndex = SOK_Float4Spill; } else if (PPC::SPERCRegClass.contains(Reg)) { OpcodeIndex = SOK_SPESpill; - } else if (PPC::SPE4RCRegClass.contains(Reg)) { - OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.contains(Reg)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.contains(Reg)) { @@ -1648,7 +1640,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, return false; int OpC = CmpInstr.getOpcode(); - unsigned CRReg = CmpInstr.getOperand(0).getReg(); + Register CRReg = CmpInstr.getOperand(0).getReg(); // FP record forms set CR1 based on the exception status bits, not a // comparison with zero. @@ -1671,7 +1663,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // Look through copies unless that gets us to a physical register. unsigned ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI); - if (TargetRegisterInfo::isVirtualRegister(ActualSrc)) + if (Register::isVirtualRegister(ActualSrc)) SrcReg = ActualSrc; // Get the unique definition of SrcReg. @@ -1937,7 +1929,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // Rotates are expensive instructions. If we're emitting a record-form // rotate that can just be an andi/andis, we should just emit that. if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) { - unsigned GPRRes = MI->getOperand(0).getReg(); + Register GPRRes = MI->getOperand(0).getReg(); int64_t SH = MI->getOperand(2).getImm(); int64_t MB = MI->getOperand(3).getImm(); int64_t ME = MI->getOperand(4).getImm(); @@ -2122,7 +2114,7 @@ bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const { llvm_unreachable("Unknown Operation!"); } - unsigned TargetReg = MI.getOperand(0).getReg(); + Register TargetReg = MI.getOperand(0).getReg(); unsigned Opcode; if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) || (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31)) @@ -2184,7 +2176,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return expandVSXMemPseudo(MI); } case PPC::SPILLTOVSR_LD: { - unsigned TargetReg = MI.getOperand(0).getReg(); + Register TargetReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(TargetReg)) { MI.setDesc(get(PPC::DFLOADf64)); return expandPostRAPseudo(MI); @@ -2194,7 +2186,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case PPC::SPILLTOVSR_ST: { - unsigned SrcReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(SrcReg)) { NumStoreSPILLVSRRCAsVec++; MI.setDesc(get(PPC::DFSTOREf64)); @@ -2206,7 +2198,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case PPC::SPILLTOVSR_LDX: { - unsigned TargetReg = MI.getOperand(0).getReg(); + Register TargetReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(TargetReg)) MI.setDesc(get(PPC::LXSDX)); else @@ -2214,7 +2206,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case PPC::SPILLTOVSR_STX: { - unsigned SrcReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(SrcReg)) { NumStoreSPILLVSRRCAsVec++; MI.setDesc(get(PPC::STXSDX)); @@ -2279,10 +2271,10 @@ void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI, int64_t Imm) const { assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG"); // Replace the REG with the Immediate. - unsigned InUseReg = MI.getOperand(OpNo).getReg(); + Register InUseReg = MI.getOperand(OpNo).getReg(); MI.getOperand(OpNo).ChangeToImmediate(Imm); - if (empty(MI.implicit_operands())) + if (MI.implicit_operands().empty()) return; // We need to make sure that the MI didn't have any implicit use @@ -2328,6 +2320,23 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI, .addImm(LII.Imm); } +MachineInstr *PPCInstrInfo::getDefMIPostRA(unsigned Reg, MachineInstr &MI, + bool &SeenIntermediateUse) const { + assert(!MI.getParent()->getParent()->getRegInfo().isSSA() && + "Should be called after register allocation."); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI; + It++; + SeenIntermediateUse = false; + for (; It != E; ++It) { + if (It->modifiesRegister(Reg, TRI)) + return &*It; + if (It->readsRegister(Reg, TRI)) + SeenIntermediateUse = true; + } + return nullptr; +} + MachineInstr *PPCInstrInfo::getForwardingDefMI( MachineInstr &MI, unsigned &OpNoForForwarding, @@ -2342,11 +2351,11 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( for (int i = 1, e = MI.getNumOperands(); i < e; i++) { if (!MI.getOperand(i).isReg()) continue; - unsigned Reg = MI.getOperand(i).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MI.getOperand(i).getReg(); + if (!Register::isVirtualRegister(Reg)) continue; unsigned TrueReg = TRI->lookThruCopyLike(Reg, MRI); - if (TargetRegisterInfo::isVirtualRegister(TrueReg)) { + if (Register::isVirtualRegister(TrueReg)) { DefMI = MRI->getVRegDef(TrueReg); if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) { OpNoForForwarding = i; @@ -2370,7 +2379,10 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 || Opc == PPC::RLWINM || Opc == PPC::RLWINMo || Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o; - if (!instrHasImmForm(MI, III, true) && !ConvertibleImmForm) + bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg()) + ? isVFRegister(MI.getOperand(0).getReg()) + : false; + if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true)) return nullptr; // Don't convert or %X, %Y, %Y since that's just a register move. @@ -2381,29 +2393,24 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( MachineOperand &MO = MI.getOperand(i); SeenIntermediateUse = false; if (MO.isReg() && MO.isUse() && !MO.isImplicit()) { - MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI; - It++; - unsigned Reg = MI.getOperand(i).getReg(); - - // Is this register defined by some form of add-immediate (including - // load-immediate) within this basic block? - for ( ; It != E; ++It) { - if (It->modifiesRegister(Reg, &getRegisterInfo())) { - switch (It->getOpcode()) { - default: break; - case PPC::LI: - case PPC::LI8: - case PPC::ADDItocL: - case PPC::ADDI: - case PPC::ADDI8: - OpNoForForwarding = i; - return &*It; - } + Register Reg = MI.getOperand(i).getReg(); + // If we see another use of this reg between the def and the MI, + // we want to flat it so the def isn't deleted. + MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse); + if (DefMI) { + // Is this register defined by some form of add-immediate (including + // load-immediate) within this basic block? + switch (DefMI->getOpcode()) { + default: break; - } else if (It->readsRegister(Reg, &getRegisterInfo())) - // If we see another use of this reg between the def and the MI, - // we want to flat it so the def isn't deleted. - SeenIntermediateUse = true; + case PPC::LI: + case PPC::LI8: + case PPC::ADDItocL: + case PPC::ADDI: + case PPC::ADDI8: + OpNoForForwarding = i; + return DefMI; + } } } } @@ -2417,7 +2424,7 @@ const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const { {PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, - PPC::SPILLTOVSR_ST, PPC::EVSTDD, PPC::SPESTW}, + PPC::SPILLTOVSR_ST, PPC::EVSTDD}, // Power 9 {PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, @@ -2433,7 +2440,7 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const { {PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb, - PPC::SPILLTOVSR_LD, PPC::EVLDD, PPC::SPELWZ}, + PPC::SPILLTOVSR_LD, PPC::EVLDD}, // Power 9 {PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, PPC::DFLOADf32, @@ -2538,12 +2545,15 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, "The forwarding operand needs to be valid at this point"); bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill(); bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled; - unsigned ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg(); + Register ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg(); if (KilledDef && KillFwdDefMI) *KilledDef = DefMI; ImmInstrInfo III; - bool HasImmForm = instrHasImmForm(MI, III, PostRA); + bool IsVFReg = MI.getOperand(0).isReg() + ? isVFRegister(MI.getOperand(0).getReg()) + : false; + bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA); // If this is a reg+reg instruction that has a reg+imm form, // and one of the operands is produced by an add-immediate, // try to convert it. @@ -2591,7 +2601,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, // If a compare-immediate is fed by an immediate and is itself an input of // an ISEL (the most common case) into a COPY of the correct register. bool Changed = false; - unsigned DefReg = MI.getOperand(0).getReg(); + Register DefReg = MI.getOperand(0).getReg(); int64_t Comparand = MI.getOperand(2).getImm(); int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 ? (Comparand | 0xFFFFFFFFFFFF0000) : Comparand; @@ -2601,8 +2611,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8) continue; unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg(); - unsigned TrueReg = CompareUseMI.getOperand(1).getReg(); - unsigned FalseReg = CompareUseMI.getOperand(2).getReg(); + Register TrueReg = CompareUseMI.getOperand(1).getReg(); + Register FalseReg = CompareUseMI.getOperand(2).getReg(); unsigned RegToCopy = selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg); if (RegToCopy == PPC::NoRegister) @@ -2777,9 +2787,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, return false; } -bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, +bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const { - unsigned Opc = MI.getOpcode(); // The vast majority of the instructions would need their operand 2 replaced // with an immediate when switching to the reg+imm form. A marked exception // are the update form loads/stores for which a constant operand 2 would need @@ -3111,7 +3120,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::LXSSPX: if (PostRA) { - if (isVFRegister(MI.getOperand(0).getReg())) + if (IsVFReg) III.ImmOpcode = PPC::LXSSP; else { III.ImmOpcode = PPC::LFS; @@ -3125,7 +3134,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::LXSDX: if (PostRA) { - if (isVFRegister(MI.getOperand(0).getReg())) + if (IsVFReg) III.ImmOpcode = PPC::LXSD; else { III.ImmOpcode = PPC::LFD; @@ -3143,7 +3152,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::STXSSPX: if (PostRA) { - if (isVFRegister(MI.getOperand(0).getReg())) + if (IsVFReg) III.ImmOpcode = PPC::STXSSP; else { III.ImmOpcode = PPC::STFS; @@ -3157,7 +3166,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::STXSDX: if (PostRA) { - if (isVFRegister(MI.getOperand(0).getReg())) + if (IsVFReg) III.ImmOpcode = PPC::STXSD; else { III.ImmOpcode = PPC::STFD; @@ -3287,7 +3296,7 @@ bool PPCInstrInfo::isRegElgibleForForwarding( if (MRI.isSSA()) return false; - unsigned Reg = RegMO.getReg(); + Register Reg = RegMO.getReg(); // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg. MachineBasicBlock::const_reverse_iterator It = MI; @@ -3511,8 +3520,8 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) { unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig : III.ZeroIsSpecialNew + 1; - unsigned OrigZeroReg = MI.getOperand(PosForOrigZero).getReg(); - unsigned NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg(); + Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg(); + Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg(); // If R0 is in the operand where zero is special for the new instruction, // it is unsafe to transform if the constant operand isn't that operand. if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) && @@ -3563,16 +3572,20 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, } else { // The 32 bit and 64 bit instructions are quite different. if (SpecialShift32) { - // Left shifts use (N, 0, 31-N), right shifts use (32-N, N, 31). - uint64_t SH = RightShift ? 32 - ShAmt : ShAmt; + // Left shifts use (N, 0, 31-N). + // Right shifts use (32-N, N, 31) if 0 < N < 32. + // use (0, 0, 31) if N == 0. + uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt; uint64_t MB = RightShift ? ShAmt : 0; uint64_t ME = RightShift ? 31 : 31 - ShAmt; replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH); MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB) .addImm(ME); } else { - // Left shifts use (N, 63-N), right shifts use (64-N, N). - uint64_t SH = RightShift ? 64 - ShAmt : ShAmt; + // Left shifts use (N, 63-N). + // Right shifts use (64-N, N) if 0 < N < 64. + // use (0, 0) if N == 0. + uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt; uint64_t ME = RightShift ? ShAmt : 63 - ShAmt; replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH); MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME); @@ -3601,8 +3614,8 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, if (III.ZeroIsSpecialNew) { // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no // need to fix up register class. - unsigned RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg(); - if (TargetRegisterInfo::isVirtualRegister(RegToModify)) { + Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg(); + if (Register::isVirtualRegister(RegToModify)) { const TargetRegisterClass *NewRC = MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ? &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass; @@ -3747,7 +3760,7 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const { return false; unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); unsigned StackOffset = MI.getOperand(1).getImm(); - unsigned StackReg = MI.getOperand(2).getReg(); + Register StackReg = MI.getOperand(2).getReg(); if (StackReg == PPC::X1 && StackOffset == TOCSaveOffset) return true; @@ -3772,7 +3785,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, switch (MI.getOpcode()) { case PPC::COPY: { - unsigned SrcReg = MI.getOperand(1).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); // In both ELFv1 and v2 ABI, method parameters and the return value // are sign- or zero-extended. @@ -3781,7 +3794,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, // We check the ZExt/SExt flags for a method parameter. if (MI.getParent()->getBasicBlock() == &MF->getFunction().getEntryBlock()) { - unsigned VReg = MI.getOperand(0).getReg(); + Register VReg = MI.getOperand(0).getReg(); if (MF->getRegInfo().isLiveIn(VReg)) return SignExt ? FuncInfo->isLiveInSExt(VReg) : FuncInfo->isLiveInZExt(VReg); @@ -3818,7 +3831,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, } // If this is a copy from another register, we recursively check source. - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); if (SrcMI != NULL) @@ -3841,8 +3854,8 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, case PPC::XORIS8: { // logical operation with 16-bit immediate does not change the upper bits. // So, we track the operand register as we do for register copy. - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); if (SrcMI != NULL) @@ -3870,8 +3883,8 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, for (unsigned I = 1; I != E; I += D) { if (MI.getOperand(I).isReg()) { - unsigned SrcReg = MI.getOperand(I).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(I).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1)) @@ -3893,12 +3906,12 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, assert(MI.getOperand(1).isReg() && MI.getOperand(2).isReg()); - unsigned SrcReg1 = MI.getOperand(1).getReg(); - unsigned SrcReg2 = MI.getOperand(2).getReg(); + Register SrcReg1 = MI.getOperand(1).getReg(); + Register SrcReg2 = MI.getOperand(2).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg1) || - !TargetRegisterInfo::isVirtualRegister(SrcReg2)) - return false; + if (!Register::isVirtualRegister(SrcReg1) || + !Register::isVirtualRegister(SrcReg2)) + return false; const MachineInstr *MISrc1 = MRI->getVRegDef(SrcReg1); const MachineInstr *MISrc2 = MRI->getVRegDef(SrcReg2); @@ -3923,21 +3936,99 @@ bool PPCInstrInfo::isBDNZ(unsigned Opcode) const { return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ)); } -bool PPCInstrInfo::analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, - MachineInstr *&CmpInst) const { - MachineBasicBlock *LoopEnd = L.getBottomBlock(); - MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator(); - // We really "analyze" only CTR loops right now. - if (I != LoopEnd->end() && isBDNZ(I->getOpcode())) { - IndVarInst = nullptr; - CmpInst = &*I; - return false; +namespace { +class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { + MachineInstr *Loop, *EndLoop, *LoopCount; + MachineFunction *MF; + const TargetInstrInfo *TII; + int64_t TripCount; + +public: + PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop, + MachineInstr *LoopCount) + : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount), + MF(Loop->getParent()->getParent()), + TII(MF->getSubtarget().getInstrInfo()) { + // Inspect the Loop instruction up-front, as it may be deleted when we call + // createTripCountGreaterCondition. + if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) + TripCount = LoopCount->getOperand(1).getImm(); + else + TripCount = -1; } - return true; + + bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { + // Only ignore the terminator. + return MI == EndLoop; + } + + Optional<bool> + createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, + SmallVectorImpl<MachineOperand> &Cond) override { + if (TripCount == -1) { + // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, + // so we don't need to generate any thing here. + Cond.push_back(MachineOperand::CreateImm(0)); + Cond.push_back(MachineOperand::CreateReg( + MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR, + true)); + return {}; + } + + return TripCount > TC; + } + + void setPreheader(MachineBasicBlock *NewPreheader) override { + // Do nothing. We want the LOOP setup instruction to stay in the *old* + // preheader, so we can use BDZ in the prologs to adapt the loop trip count. + } + + void adjustTripCount(int TripCountAdjust) override { + // If the loop trip count is a compile-time value, then just change the + // value. + if (LoopCount->getOpcode() == PPC::LI8 || + LoopCount->getOpcode() == PPC::LI) { + int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust; + LoopCount->getOperand(1).setImm(TripCount); + return; + } + + // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, + // so we don't need to generate any thing here. + } + + void disposed() override { + Loop->eraseFromParent(); + // Ensure the loop setup instruction is deleted too. + LoopCount->eraseFromParent(); + } +}; +} // namespace + +std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> +PPCInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + // We really "analyze" only hardware loops right now. + MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); + MachineBasicBlock *Preheader = *LoopBB->pred_begin(); + if (Preheader == LoopBB) + Preheader = *std::next(LoopBB->pred_begin()); + MachineFunction *MF = Preheader->getParent(); + + if (I != LoopBB->end() && isBDNZ(I->getOpcode())) { + SmallPtrSet<MachineBasicBlock *, 8> Visited; + if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) { + Register LoopCountReg = LoopInst->getOperand(0).getReg(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg); + return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount); + } + } + return nullptr; } -MachineInstr * -PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const { +MachineInstr *PPCInstrInfo::findLoopInstr( + MachineBasicBlock &PreHeader, + SmallPtrSet<MachineBasicBlock *, 8> &Visited) const { unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop); @@ -3948,50 +4039,6 @@ PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const { return nullptr; } -unsigned PPCInstrInfo::reduceLoopCount( - MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar, - MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond, - SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter, - unsigned MaxIter) const { - // We expect a hardware loop currently. This means that IndVar is set - // to null, and the compare is the ENDLOOP instruction. - assert((!IndVar) && isBDNZ(Cmp.getOpcode()) && "Expecting a CTR loop"); - MachineFunction *MF = MBB.getParent(); - DebugLoc DL = Cmp.getDebugLoc(); - MachineInstr *Loop = findLoopInstr(PreHeader); - if (!Loop) - return 0; - unsigned LoopCountReg = Loop->getOperand(0).getReg(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg); - - if (!LoopCount) - return 0; - // If the loop trip count is a compile-time value, then just change the - // value. - if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) { - int64_t Offset = LoopCount->getOperand(1).getImm(); - if (Offset <= 1) { - LoopCount->eraseFromParent(); - Loop->eraseFromParent(); - return 0; - } - LoopCount->getOperand(1).setImm(Offset - 1); - return Offset - 1; - } - - // The loop trip count is a run-time value. - // We need to subtract one from the trip count, - // and insert branch later to check if we're done with the loop. - - // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, - // so we don't need to generate any thing here. - Cond.push_back(MachineOperand::CreateImm(0)); - Cond.push_back(MachineOperand::CreateReg( - Subtarget.isPPC64() ? PPC::CTR8 : PPC::CTR, true)); - return LoopCountReg; -} - // Return true if get the base operand, byte offset of an instruction and the // memory width. Width is the size of memory that is being loaded/stored. bool PPCInstrInfo::getMemOperandWithOffsetWidth( @@ -4018,8 +4065,7 @@ bool PPCInstrInfo::getMemOperandWithOffsetWidth( } bool PPCInstrInfo::areMemAccessesTriviallyDisjoint( - const MachineInstr &MIa, const MachineInstr &MIb, - AliasAnalysis * /*AA*/) const { + const MachineInstr &MIa, const MachineInstr &MIb) const { assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 70fb757e8f1e..19ab30cb0908 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -248,11 +248,11 @@ public: unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const override; + AAResults *AA) const override; unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; - bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, + bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; void insertNoop(MachineBasicBlock &MBB, @@ -370,8 +370,7 @@ public: /// otherwise bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA = nullptr) const override; + const MachineInstr &MIb) const override; /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. @@ -439,9 +438,14 @@ public: void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const; - bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III, + bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const; + // In PostRA phase, try to find instruction defines \p Reg before \p MI. + // \p SeenIntermediate is set to true if uses between DefMI and \p MI exist. + MachineInstr *getDefMIPostRA(unsigned Reg, MachineInstr &MI, + bool &SeenIntermediateUse) const; + /// getRegNumForOperand - some operands use different numbering schemes /// for the same registers. For example, a VSX instruction may have any of /// vs0-vs63 allocated whereas an Altivec instruction could only have @@ -481,26 +485,14 @@ public: /// On PPC, we have two instructions used to set-up the hardware loop /// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8) /// instructions to indicate the end of a loop. - MachineInstr *findLoopInstr(MachineBasicBlock &PreHeader) const; - - /// Analyze the loop code to find the loop induction variable and compare used - /// to compute the number of iterations. Currently, we analyze loop that are - /// controlled using hardware loops. In this case, the induction variable - /// instruction is null. For all other cases, this function returns true, - /// which means we're unable to analyze it. \p IndVarInst and \p CmpInst will - /// return new values when we can analyze the readonly loop \p L, otherwise, - /// nothing got changed - bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, - MachineInstr *&CmpInst) const override; - /// Generate code to reduce the loop iteration by one and check if the loop - /// is finished. Return the value/register of the new loop count. We need - /// this function when peeling off one or more iterations of a loop. This - /// function assumes the last iteration is peeled first. - unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, - MachineInstr *IndVar, MachineInstr &Cmp, - SmallVectorImpl<MachineOperand> &Cond, - SmallVectorImpl<MachineInstr *> &PrevInsts, - unsigned Iter, unsigned MaxIter) const override; + MachineInstr * + findLoopInstr(MachineBasicBlock &PreHeader, + SmallPtrSet<MachineBasicBlock *, 8> &Visited) const; + + /// Analyze loop L, which must be a single-basic-block loop, and if the + /// conditions can be understood enough produce a PipelinerLoopInfo object. + std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> + analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; }; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index c313337047f0..24183277519b 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -386,7 +386,9 @@ def immZExt16 : PatLeaf<(imm), [{ // field. Used by instructions like 'ori'. return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); }], LO16>; -def immAnyExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm) || isUInt<8>(Imm); }]>; +def immNonAllOneAnyExt8 : ImmLeaf<i32, [{ + return (isInt<8>(Imm) && (Imm != -1)) || (isUInt<8>(Imm) && (Imm != 0xFF)); +}]>; def immSExt5NonZero : ImmLeaf<i32, [{ return Imm && isInt<5>(Imm); }]>; // imm16Shifted* - These match immediates where the low 16-bits are zero. There @@ -577,7 +579,7 @@ def sperc : RegisterOperand<SPERC> { def PPCRegSPE4RCAsmOperand : AsmOperandClass { let Name = "RegSPE4RC"; let PredicateMethod = "isRegNumber"; } -def spe4rc : RegisterOperand<SPE4RC> { +def spe4rc : RegisterOperand<GPRC> { let ParserMatchClass = PPCRegSPE4RCAsmOperand; } @@ -3161,7 +3163,16 @@ def ADDISdtprelHA32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s1 def LWZtoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg), "#LWZtoc", [(set i32:$rD, + (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>; +def LWZtocL : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc_nor0:$reg), + "#LWZtocL", + [(set i32:$rD, (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>; +def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentry32:$disp), + "#ADDIStocHA", + [(set i32:$rD, + (PPCtoc_entry i32:$reg, tglobaladdr:$disp))]>; + // Get Global (GOT) Base Register offset, from the word immediately preceding // the function label. def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>; @@ -3177,21 +3188,21 @@ def : Pat<(srl i32:$rS, i32:$rB), def : Pat<(shl i32:$rS, i32:$rB), (SLW $rS, $rB)>; -def : Pat<(zextloadi1 iaddr:$src), +def : Pat<(i32 (zextloadi1 iaddr:$src)), (LBZ iaddr:$src)>; -def : Pat<(zextloadi1 xaddr:$src), +def : Pat<(i32 (zextloadi1 xaddr:$src)), (LBZX xaddr:$src)>; -def : Pat<(extloadi1 iaddr:$src), +def : Pat<(i32 (extloadi1 iaddr:$src)), (LBZ iaddr:$src)>; -def : Pat<(extloadi1 xaddr:$src), +def : Pat<(i32 (extloadi1 xaddr:$src)), (LBZX xaddr:$src)>; -def : Pat<(extloadi8 iaddr:$src), +def : Pat<(i32 (extloadi8 iaddr:$src)), (LBZ iaddr:$src)>; -def : Pat<(extloadi8 xaddr:$src), +def : Pat<(i32 (extloadi8 xaddr:$src)), (LBZX xaddr:$src)>; -def : Pat<(extloadi16 iaddr:$src), +def : Pat<(i32 (extloadi16 iaddr:$src)), (LHZ iaddr:$src)>; -def : Pat<(extloadi16 xaddr:$src), +def : Pat<(i32 (extloadi16 xaddr:$src)), (LHZX xaddr:$src)>; let Predicates = [HasFPU] in { def : Pat<(f64 (extloadf32 iaddr:$src)), @@ -3564,23 +3575,6 @@ def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)), (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; -defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)), - (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)), - (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)), - (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)), - (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)), - (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; -defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)), - (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; - -defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)), - (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), - (LO16 imm:$imm)), sub_eq)>; - def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)), (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)), @@ -3592,17 +3586,6 @@ def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)), def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)), (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)), - (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)), - (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)), - (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)), - (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)), - (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; - // SETCC for i64. def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)), (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; @@ -3632,6 +3615,47 @@ def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)), (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; + +// Instantiations of CRNotPat for i32. +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; + +// Instantiations of CRNotPat for i64. defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)), (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)), @@ -3649,17 +3673,6 @@ defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)), (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)), - (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)), - (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)), - (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)), - (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)), - (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; - defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)), (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)), @@ -3671,6 +3684,56 @@ defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)), defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)), (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; +let Predicates = [HasFPU] in { +// Instantiations of CRNotPat for f32. +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; + +// Instantiations of CRNotPat for f64. +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; + +// Instantiations of CRNotPat for f128. +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; +} + // SETCC for f32. let Predicates = [HasFPU] in { def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), @@ -3688,21 +3751,6 @@ def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)), def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; - // SETCC for f64. def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; @@ -3719,21 +3767,6 @@ def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)), def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; - // SETCC for f128. def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOLT)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; @@ -3750,21 +3783,6 @@ def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETEQ)), def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETUO)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; - } // This must be in this file because it relies on patterns defined in this file diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 07f38a61d098..2aad5860d87f 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -58,8 +58,12 @@ def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [ SDTCisVT<0, v4f32>, SDTCisPtrTy<1> ]>; -def SDT_PPCfpextlh : SDTypeProfile<1, 1, [ - SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32> +def SDT_PPCfpexth : SDTypeProfile<1, 2, [ + SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>, SDTCisPtrTy<2> +]>; + +def SDT_PPCldsplat : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisPtrTy<1> ]>; // Little-endian-specific nodes. @@ -78,12 +82,21 @@ def SDTVecConv : SDTypeProfile<1, 2, [ def SDTVabsd : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32> ]>; - +def SDT_PPCld_vec_be : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisPtrTy<1> +]>; +def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [ + SDTCisVec<0>, SDTCisPtrTy<1> +]>; def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, [SDNPHasChain, SDNPMayStore]>; +def PPCld_vec_be : SDNode<"PPCISD::LOAD_VEC_BE", SDT_PPCld_vec_be, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPCst_vec_be : SDNode<"PPCISD::STORE_VEC_BE", SDT_PPCst_vec_be, + [SDNPHasChain, SDNPMayStore]>; def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; @@ -93,9 +106,11 @@ def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>; def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>; def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>; -def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>; +def PPCfpexth : SDNode<"PPCISD::FP_EXTEND_HALF", SDT_PPCfpexth, []>; def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase, string asmstr, InstrItinClass itin, Intrinsic Int, @@ -855,14 +870,14 @@ let Uses = [RM] in { let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in { - def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins), + def XXLXORz : XX3Form_SameOp<60, 154, (outs vsrc:$XT), (ins), "xxlxor $XT, $XT, $XT", IIC_VecGeneral, [(set v4i32:$XT, (v4i32 immAllZerosV))]>; - def XXLXORdpz : XX3Form_SetZero<60, 154, + def XXLXORdpz : XX3Form_SameOp<60, 154, (outs vsfrc:$XT), (ins), "xxlxor $XT, $XT, $XT", IIC_VecGeneral, [(set f64:$XT, (fpimm0))]>; - def XXLXORspz : XX3Form_SetZero<60, 154, + def XXLXORspz : XX3Form_SameOp<60, 154, (outs vssrc:$XT), (ins), "xxlxor $XT, $XT, $XT", IIC_VecGeneral, [(set f32:$XT, (fpimm0))]>; @@ -996,21 +1011,21 @@ def : Pat<(f64 (extractelt v2f64:$S, 1)), (f64 (EXTRACT_SUBREG $S, sub_64))>; } -// Additional fnmsub patterns: -a*c + b == -(a*c - b) -def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), - (XSNMSUBADP $B, $C, $A)>; -def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B), - (XSNMSUBADP $B, $C, $A)>; +// Additional fnmsub patterns: -a*b + c == -(a*b - c) +def : Pat<(fma (fneg f64:$A), f64:$B, f64:$C), + (XSNMSUBADP $C, $A, $B)>; +def : Pat<(fma f64:$A, (fneg f64:$B), f64:$C), + (XSNMSUBADP $C, $A, $B)>; -def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B), - (XVNMSUBADP $B, $C, $A)>; -def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B), - (XVNMSUBADP $B, $C, $A)>; +def : Pat<(fma (fneg v2f64:$A), v2f64:$B, v2f64:$C), + (XVNMSUBADP $C, $A, $B)>; +def : Pat<(fma v2f64:$A, (fneg v2f64:$B), v2f64:$C), + (XVNMSUBADP $C, $A, $B)>; -def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B), - (XVNMSUBASP $B, $C, $A)>; -def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B), - (XVNMSUBASP $B, $C, $A)>; +def : Pat<(fma (fneg v4f32:$A), v4f32:$B, v4f32:$C), + (XVNMSUBASP $C, $A, $B)>; +def : Pat<(fma v4f32:$A, (fneg v4f32:$B), v4f32:$C), + (XVNMSUBASP $C, $A, $B)>; def : Pat<(v2f64 (bitconvert v4f32:$A)), (COPY_TO_REGCLASS $A, VSRC)>; @@ -1077,7 +1092,8 @@ def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)), def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)), (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>; -def : Pat<(v2f64 (PPCfpextlh v4f32:$C)), (XVCVSPDP (XXMRGHW $C, $C))>; +def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>; +def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>; // Loads. let Predicates = [HasVSX, HasOnlySwappingMemOps] in { @@ -1088,6 +1104,19 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in { (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; } + +// Load vector big endian order +let Predicates = [IsLittleEndian, HasVSX] in { + def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; + def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; + def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; + def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; +} + let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in { def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; @@ -1288,6 +1317,13 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), (XXLEQV $A, $B)>; + let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1, + isReMaterializable = 1 in { + def XXLEQVOnes : XX3Form_SameOp<60, 186, (outs vsrc:$XT), (ins), + "xxleqv $XT, $XT, $XT", IIC_VecGeneral, + [(set v4i32:$XT, (bitconvert (v16i8 immAllOnesV)))]>; + } + def XXLORC : XX3Form<60, 170, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlorc $XT, $XA, $XB", IIC_VecGeneral, @@ -1476,6 +1512,12 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. AltVSXFMARel; } + // Additional xsnmsubasp patterns: -a*b + c == -(a*b - c) + def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C), + (XSNMSUBASP $C, $A, $B)>; + def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C), + (XSNMSUBASP $C, $A, $B)>; + // Single Precision Conversions (FP <-> INT) def XSCVSXDSP : XX2Form<60, 312, (outs vssrc:$XT), (ins vsfrc:$XB), @@ -1564,16 +1606,33 @@ let Predicates = [HasDirectMove] in { def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), "mfvsrwz $rA, $XT", IIC_VecGeneral, [(set i32:$rA, (PPCmfvsr f64:$XT))]>; + let isCodeGenOnly = 1 in + def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsrc:$XT), + "mfvsrwz $rA, $XT", IIC_VecGeneral, + []>; def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), "mtvsrd $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsra i64:$rA))]>, Requires<[In64BitMode]>; + let isCodeGenOnly = 1 in + def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$rA), + "mtvsrd $XT, $rA", IIC_VecGeneral, + []>, + Requires<[In64BitMode]>; def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), "mtvsrwa $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsra i32:$rA))]>; + let isCodeGenOnly = 1 in + def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$rA), + "mtvsrwa $XT, $rA", IIC_VecGeneral, + []>; def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), "mtvsrwz $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; + let isCodeGenOnly = 1 in + def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$rA), + "mtvsrwz $XT, $rA", IIC_VecGeneral, + []>; } // HasDirectMove let Predicates = [IsISA3_0, HasDirectMove] in { @@ -1597,6 +1656,22 @@ def : InstAlias<"mfvrd $rA, $XT", (MFVRD g8rc:$rA, vrrc:$XT), 0>; def : InstAlias<"mffprd $rA, $src", (MFVSRD g8rc:$rA, f8rc:$src)>; +def : InstAlias<"mtvrd $XT, $rA", + (MTVRD vrrc:$XT, g8rc:$rA), 0>; +def : InstAlias<"mtfprd $dst, $rA", + (MTVSRD f8rc:$dst, g8rc:$rA)>; +def : InstAlias<"mfvrwz $rA, $XT", + (MFVRWZ gprc:$rA, vrrc:$XT), 0>; +def : InstAlias<"mffprwz $rA, $src", + (MFVSRWZ gprc:$rA, f8rc:$src)>; +def : InstAlias<"mtvrwa $XT, $rA", + (MTVRWA vrrc:$XT, gprc:$rA), 0>; +def : InstAlias<"mtfprwa $dst, $rA", + (MTVSRWA f8rc:$dst, gprc:$rA)>; +def : InstAlias<"mtvrwz $XT, $rA", + (MTVRWZ vrrc:$XT, gprc:$rA), 0>; +def : InstAlias<"mtfprwz $dst, $rA", + (MTVSRWZ f8rc:$dst, gprc:$rA)>; /* Direct moves of various widths from GPR's into VSR's. Each move lines the value up into element 0 (both BE and LE). Namely, entities smaller than @@ -2581,9 +2656,9 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (fneg (int_ppc_fmaf128_round_to_odd f128:$vA, f128:$vB, (fneg f128:$vTi))))]>; - // Additional fnmsub patterns: -a*c + b == -(a*c - b) - def : Pat<(fma (fneg f128:$A), f128:$C, f128:$B), (XSNMSUBQP $B, $C, $A)>; - def : Pat<(fma f128:$A, (fneg f128:$C), f128:$B), (XSNMSUBQP $B, $C, $A)>; + // Additional fnmsub patterns: -a*b + c == -(a*b - c) + def : Pat<(fma (fneg f128:$A), f128:$B, f128:$C), (XSNMSUBQP $C, $A, $B)>; + def : Pat<(fma f128:$A, (fneg f128:$B), f128:$C), (XSNMSUBQP $C, $A, $B)>; //===--------------------------------------------------------------------===// // Quad/Double-Precision Compare Instructions: @@ -2799,12 +2874,12 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, [(set v4i32: $XT, - (int_ppc_vsx_xvtstdcsp v4f32:$XB, imm:$DCMX))]>; + (int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>; def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5, (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, [(set v2i64: $XT, - (int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>; + (int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>; //===--------------------------------------------------------------------===// @@ -3024,6 +3099,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; + + def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)), + (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>; + def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst), + (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; + + def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)), + (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>; + def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst), + (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; } // IsLittleEndian, HasP9Vector let Predicates = [IsBigEndian, HasP9Vector] in { @@ -3059,7 +3144,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; - } // IsLittleEndian, HasP9Vector + } // IsBigEndian, HasP9Vector // D-Form Load/Store def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; @@ -3858,6 +3943,10 @@ let AddedComplexity = 400 in { (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; + def : Pat<(v2f64 (PPCldsplat xoaddr:$A)), + (v2f64 (LXVDSX xoaddr:$A))>; + def : Pat<(v2i64 (PPCldsplat xoaddr:$A)), + (v2i64 (LXVDSX xoaddr:$A))>; // Build vectors of floating point converted to i64. def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)), @@ -4063,27 +4152,32 @@ let AddedComplexity = 400 in { (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; } + let Predicates = [HasP8Vector] in { + def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))), + (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; + def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))), + (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; + def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))), + (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; + def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))), + (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; + } + let Predicates = [HasP9Vector] in { // Endianness-neutral patterns for const splats with ISA 3.0 instructions. def : Pat<(v4i32 (scalar_to_vector i32:$A)), (v4i32 (MTVSRWS $A))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), (v4i32 (MTVSRWS $A))>; - def : Pat<(v16i8 (build_vector immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A)), + def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)), (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>; - def : Pat<(v16i8 immAllOnesV), - (v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>; - def : Pat<(v8i16 immAllOnesV), - (v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>; - def : Pat<(v4i32 immAllOnesV), - (v4i32 (XXSPLTIB 255))>; - def : Pat<(v2i64 immAllOnesV), - (v2i64 (XXSPLTIB 255))>; def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>; def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), @@ -4102,6 +4196,10 @@ let AddedComplexity = 400 in { (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0))>; + def : Pat<(v4f32 (PPCldsplat xoaddr:$A)), + (v4f32 (LXVWSX xoaddr:$A))>; + def : Pat<(v4i32 (PPCldsplat xoaddr:$A)), + (v4i32 (LXVWSX xoaddr:$A))>; } let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in { diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index 4d45d96d4479..d252cfbd26b1 100644 --- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -63,8 +63,24 @@ static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars", cl::desc("Potential PHI threshold for PPC preinc loop prep")); STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form"); +STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten"); namespace { + struct BucketElement { + BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {} + BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {} + + const SCEVConstant *Offset; + Instruction *Instr; + }; + + struct Bucket { + Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B), + Elements(1, BucketElement(I)) {} + + const SCEV *BaseSCEV; + SmallVector<BucketElement, 16> Elements; + }; class PPCLoopPreIncPrep : public FunctionPass { public: @@ -85,21 +101,47 @@ namespace { AU.addRequired<ScalarEvolutionWrapperPass>(); } - bool alreadyPrepared(Loop *L, Instruction* MemI, - const SCEV *BasePtrStartSCEV, - const SCEVConstant *BasePtrIncSCEV); bool runOnFunction(Function &F) override; - bool runOnLoop(Loop *L); - void simplifyLoopLatch(Loop *L); - bool rotateLoop(Loop *L); - private: PPCTargetMachine *TM = nullptr; + const PPCSubtarget *ST; DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; bool PreserveLCSSA; + + bool runOnLoop(Loop *L); + + /// Check if required PHI node is already exist in Loop \p L. + bool alreadyPrepared(Loop *L, Instruction* MemI, + const SCEV *BasePtrStartSCEV, + const SCEVConstant *BasePtrIncSCEV); + + /// Collect condition matched(\p isValidCandidate() returns true) + /// candidates in Loop \p L. + SmallVector<Bucket, 16> + collectCandidates(Loop *L, + std::function<bool(const Instruction *, const Value *)> + isValidCandidate, + unsigned MaxCandidateNum); + + /// Add a candidate to candidates \p Buckets. + void addOneCandidate(Instruction *MemI, const SCEV *LSCEV, + SmallVector<Bucket, 16> &Buckets, + unsigned MaxCandidateNum); + + /// Prepare all candidates in \p Buckets for update form. + bool updateFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets); + + /// Prepare for one chain \p BucketChain, find the best base element and + /// update all other elements in \p BucketChain accordingly. + bool prepareBaseForUpdateFormChain(Bucket &BucketChain); + + /// Rewrite load/store instructions in \p BucketChain according to + /// preparation. + bool rewriteLoadStores(Loop *L, Bucket &BucketChain, + SmallSet<BasicBlock *, 16> &BBChanged); }; } // end anonymous namespace @@ -111,30 +153,15 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) +static const std::string PHINodeNameSuffix = ".phi"; +static const std::string CastNodeNameSuffix = ".cast"; +static const std::string GEPNodeIncNameSuffix = ".inc"; +static const std::string GEPNodeOffNameSuffix = ".off"; + FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) { return new PPCLoopPreIncPrep(TM); } -namespace { - - struct BucketElement { - BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {} - BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {} - - const SCEVConstant *Offset; - Instruction *Instr; - }; - - struct Bucket { - Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B), - Elements(1, BucketElement(I)) {} - - const SCEV *BaseSCEV; - SmallVector<BucketElement, 16> Elements; - }; - -} // end anonymous namespace - static bool IsPtrInBounds(Value *BasePtr) { Value *StrippedBasePtr = BasePtr; while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr)) @@ -145,6 +172,14 @@ static bool IsPtrInBounds(Value *BasePtr) { return false; } +static std::string getInstrName(const Value *I, const std::string Suffix) { + assert(I && "Invalid paramater!"); + if (I->hasName()) + return (I->getName() + Suffix).str(); + else + return ""; +} + static Value *GetPointerOperand(Value *MemI) { if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) { return LMemI->getPointerOperand(); @@ -167,6 +202,7 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DT = DTWP ? &DTWP->getDomTree() : nullptr; PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + ST = TM ? TM->getSubtargetImpl(F) : nullptr; bool MadeChange = false; @@ -177,10 +213,280 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { return MadeChange; } +void PPCLoopPreIncPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV, + SmallVector<Bucket, 16> &Buckets, + unsigned MaxCandidateNum) { + assert((MemI && GetPointerOperand(MemI)) && + "Candidate should be a memory instruction."); + assert(LSCEV && "Invalid SCEV for Ptr value."); + bool FoundBucket = false; + for (auto &B : Buckets) { + const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV); + if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) { + B.Elements.push_back(BucketElement(CDiff, MemI)); + FoundBucket = true; + break; + } + } + + if (!FoundBucket) { + if (Buckets.size() == MaxCandidateNum) + return; + Buckets.push_back(Bucket(LSCEV, MemI)); + } +} + +SmallVector<Bucket, 16> PPCLoopPreIncPrep::collectCandidates( + Loop *L, + std::function<bool(const Instruction *, const Value *)> isValidCandidate, + unsigned MaxCandidateNum) { + SmallVector<Bucket, 16> Buckets; + for (const auto &BB : L->blocks()) + for (auto &J : *BB) { + Value *PtrValue; + Instruction *MemI; + + if (LoadInst *LMemI = dyn_cast<LoadInst>(&J)) { + MemI = LMemI; + PtrValue = LMemI->getPointerOperand(); + } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&J)) { + MemI = SMemI; + PtrValue = SMemI->getPointerOperand(); + } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(&J)) { + if (IMemI->getIntrinsicID() == Intrinsic::prefetch) { + MemI = IMemI; + PtrValue = IMemI->getArgOperand(0); + } else continue; + } else continue; + + unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); + if (PtrAddrSpace) + continue; + + if (L->isLoopInvariant(PtrValue)) + continue; + + const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); + const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV); + if (!LARSCEV || LARSCEV->getLoop() != L) + continue; + + if (isValidCandidate(&J, PtrValue)) + addOneCandidate(MemI, LSCEV, Buckets, MaxCandidateNum); + } + return Buckets; +} + +// TODO: implement a more clever base choosing policy. +// Currently we always choose an exist load/store offset. This maybe lead to +// suboptimal code sequences. For example, for one DS chain with offsets +// {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp +// for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a +// multipler of 4, it cannot be represented by sint16. +bool PPCLoopPreIncPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) { + // We have a choice now of which instruction's memory operand we use as the + // base for the generated PHI. Always picking the first instruction in each + // bucket does not work well, specifically because that instruction might + // be a prefetch (and there are no pre-increment dcbt variants). Otherwise, + // the choice is somewhat arbitrary, because the backend will happily + // generate direct offsets from both the pre-incremented and + // post-incremented pointer values. Thus, we'll pick the first non-prefetch + // instruction in each bucket, and adjust the recurrence and other offsets + // accordingly. + for (int j = 0, je = BucketChain.Elements.size(); j != je; ++j) { + if (auto *II = dyn_cast<IntrinsicInst>(BucketChain.Elements[j].Instr)) + if (II->getIntrinsicID() == Intrinsic::prefetch) + continue; + + // If we'd otherwise pick the first element anyway, there's nothing to do. + if (j == 0) + break; + + // If our chosen element has no offset from the base pointer, there's + // nothing to do. + if (!BucketChain.Elements[j].Offset || + BucketChain.Elements[j].Offset->isZero()) + break; + + const SCEV *Offset = BucketChain.Elements[j].Offset; + BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset); + for (auto &E : BucketChain.Elements) { + if (E.Offset) + E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset)); + else + E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset)); + } + + std::swap(BucketChain.Elements[j], BucketChain.Elements[0]); + break; + } + return true; +} + +bool PPCLoopPreIncPrep::rewriteLoadStores( + Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged) { + bool MadeChange = false; + const SCEVAddRecExpr *BasePtrSCEV = + cast<SCEVAddRecExpr>(BucketChain.BaseSCEV); + if (!BasePtrSCEV->isAffine()) + return MadeChange; + + LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); + + assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?"); + + // The instruction corresponding to the Bucket's BaseSCEV must be the first + // in the vector of elements. + Instruction *MemI = BucketChain.Elements.begin()->Instr; + Value *BasePtr = GetPointerOperand(MemI); + assert(BasePtr && "No pointer operand"); + + Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext()); + Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(), + BasePtr->getType()->getPointerAddressSpace()); + + const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart(); + if (!SE->isLoopInvariant(BasePtrStartSCEV, L)) + return MadeChange; + + const SCEVConstant *BasePtrIncSCEV = + dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE)); + if (!BasePtrIncSCEV) + return MadeChange; + BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV); + if (!isSafeToExpand(BasePtrStartSCEV, *SE)) + return MadeChange; + + if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV)) + return MadeChange; + + LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); + + BasicBlock *Header = L->getHeader(); + unsigned HeaderLoopPredCount = pred_size(Header); + BasicBlock *LoopPredecessor = L->getLoopPredecessor(); + + PHINode *NewPHI = + PHINode::Create(I8PtrTy, HeaderLoopPredCount, + getInstrName(MemI, PHINodeNameSuffix), + Header->getFirstNonPHI()); + + SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart"); + Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy, + LoopPredecessor->getTerminator()); + + // Note that LoopPredecessor might occur in the predecessor list multiple + // times, and we need to add it the right number of times. + for (const auto &PI : predecessors(Header)) { + if (PI != LoopPredecessor) + continue; + + NewPHI->addIncoming(BasePtrStart, LoopPredecessor); + } + + Instruction *InsPoint = &*Header->getFirstInsertionPt(); + GetElementPtrInst *PtrInc = GetElementPtrInst::Create( + I8Ty, NewPHI, BasePtrIncSCEV->getValue(), + getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint); + PtrInc->setIsInBounds(IsPtrInBounds(BasePtr)); + for (const auto &PI : predecessors(Header)) { + if (PI == LoopPredecessor) + continue; + + NewPHI->addIncoming(PtrInc, PI); + } + + Instruction *NewBasePtr; + if (PtrInc->getType() != BasePtr->getType()) + NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(), + getInstrName(PtrInc, CastNodeNameSuffix), InsPoint); + else + NewBasePtr = PtrInc; + + if (Instruction *IDel = dyn_cast<Instruction>(BasePtr)) + BBChanged.insert(IDel->getParent()); + BasePtr->replaceAllUsesWith(NewBasePtr); + RecursivelyDeleteTriviallyDeadInstructions(BasePtr); + + // Keep track of the replacement pointer values we've inserted so that we + // don't generate more pointer values than necessary. + SmallPtrSet<Value *, 16> NewPtrs; + NewPtrs.insert(NewBasePtr); + + for (auto I = std::next(BucketChain.Elements.begin()), + IE = BucketChain.Elements.end(); I != IE; ++I) { + Value *Ptr = GetPointerOperand(I->Instr); + assert(Ptr && "No pointer operand"); + if (NewPtrs.count(Ptr)) + continue; + + Instruction *RealNewPtr; + if (!I->Offset || I->Offset->getValue()->isZero()) { + RealNewPtr = NewBasePtr; + } else { + Instruction *PtrIP = dyn_cast<Instruction>(Ptr); + if (PtrIP && isa<Instruction>(NewBasePtr) && + cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent()) + PtrIP = nullptr; + else if (PtrIP && isa<PHINode>(PtrIP)) + PtrIP = &*PtrIP->getParent()->getFirstInsertionPt(); + else if (!PtrIP) + PtrIP = I->Instr; + + GetElementPtrInst *NewPtr = GetElementPtrInst::Create( + I8Ty, PtrInc, I->Offset->getValue(), + getInstrName(I->Instr, GEPNodeOffNameSuffix), PtrIP); + if (!PtrIP) + NewPtr->insertAfter(cast<Instruction>(PtrInc)); + NewPtr->setIsInBounds(IsPtrInBounds(Ptr)); + RealNewPtr = NewPtr; + } + + if (Instruction *IDel = dyn_cast<Instruction>(Ptr)) + BBChanged.insert(IDel->getParent()); + + Instruction *ReplNewPtr; + if (Ptr->getType() != RealNewPtr->getType()) { + ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(), + getInstrName(Ptr, CastNodeNameSuffix)); + ReplNewPtr->insertAfter(RealNewPtr); + } else + ReplNewPtr = RealNewPtr; + + Ptr->replaceAllUsesWith(ReplNewPtr); + RecursivelyDeleteTriviallyDeadInstructions(Ptr); + + NewPtrs.insert(RealNewPtr); + } + + MadeChange = true; + UpdFormChainRewritten++; + + return MadeChange; +} + +bool PPCLoopPreIncPrep::updateFormPrep(Loop *L, + SmallVector<Bucket, 16> &Buckets) { + bool MadeChange = false; + if (Buckets.empty()) + return MadeChange; + SmallSet<BasicBlock *, 16> BBChanged; + for (auto &Bucket : Buckets) + // The base address of each bucket is transformed into a phi and the others + // are rewritten based on new base. + if (prepareBaseForUpdateFormChain(Bucket)) + MadeChange |= rewriteLoadStores(L, Bucket, BBChanged); + if (MadeChange) + for (auto &BB : L->blocks()) + if (BBChanged.count(BB)) + DeleteDeadPHIs(BB); + return MadeChange; +} + // In order to prepare for the pre-increment a PHI is added. // This function will check to see if that PHI already exists and will return -// true if it found an existing PHI with the same start and increment as the -// one we wanted to create. +// true if it found an existing PHI with the same start and increment as the +// one we wanted to create. bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI, const SCEV *BasePtrStartSCEV, const SCEVConstant *BasePtrIncSCEV) { @@ -216,10 +522,10 @@ bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI, continue; if (CurrentPHINode->getNumIncomingValues() == 2) { - if ( (CurrentPHINode->getIncomingBlock(0) == LatchBB && - CurrentPHINode->getIncomingBlock(1) == PredBB) || - (CurrentPHINode->getIncomingBlock(1) == LatchBB && - CurrentPHINode->getIncomingBlock(0) == PredBB) ) { + if ((CurrentPHINode->getIncomingBlock(0) == LatchBB && + CurrentPHINode->getIncomingBlock(1) == PredBB) || + (CurrentPHINode->getIncomingBlock(1) == LatchBB && + CurrentPHINode->getIncomingBlock(0) == PredBB)) { if (PHIBasePtrSCEV->getStart() == BasePtrStartSCEV && PHIBasePtrIncSCEV == BasePtrIncSCEV) { // The existing PHI (CurrentPHINode) has the same start and increment @@ -242,89 +548,6 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n"); - BasicBlock *Header = L->getHeader(); - - const PPCSubtarget *ST = - TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr; - - unsigned HeaderLoopPredCount = pred_size(Header); - - // Collect buckets of comparable addresses used by loads and stores. - SmallVector<Bucket, 16> Buckets; - for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); - I != IE; ++I) { - for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); - J != JE; ++J) { - Value *PtrValue; - Instruction *MemI; - - if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) { - MemI = LMemI; - PtrValue = LMemI->getPointerOperand(); - } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) { - MemI = SMemI; - PtrValue = SMemI->getPointerOperand(); - } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(J)) { - if (IMemI->getIntrinsicID() == Intrinsic::prefetch) { - MemI = IMemI; - PtrValue = IMemI->getArgOperand(0); - } else continue; - } else continue; - - unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); - if (PtrAddrSpace) - continue; - - // There are no update forms for Altivec vector load/stores. - if (ST && ST->hasAltivec() && - PtrValue->getType()->getPointerElementType()->isVectorTy()) - continue; - - if (L->isLoopInvariant(PtrValue)) - continue; - - const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); - if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) { - if (LARSCEV->getLoop() != L) - continue; - // See getPreIndexedAddressParts, the displacement for LDU/STDU has to - // be 4's multiple (DS-form). For i64 loads/stores when the displacement - // fits in a 16-bit signed field but isn't a multiple of 4, it will be - // useless and possible to break some original well-form addressing mode - // to make this pre-inc prep for it. - if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) { - if (const SCEVConstant *StepConst = - dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) { - const APInt &ConstInt = StepConst->getValue()->getValue(); - if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0) - continue; - } - } - } else { - continue; - } - - bool FoundBucket = false; - for (auto &B : Buckets) { - const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV); - if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) { - B.Elements.push_back(BucketElement(CDiff, MemI)); - FoundBucket = true; - break; - } - } - - if (!FoundBucket) { - if (Buckets.size() == MaxVars) - return MadeChange; - Buckets.push_back(Bucket(LSCEV, MemI)); - } - } - } - - if (Buckets.empty()) - return MadeChange; - BasicBlock *LoopPredecessor = L->getLoopPredecessor(); // If there is no loop predecessor, or the loop predecessor's terminator // returns a value (which might contribute to determining the loop's @@ -335,191 +558,48 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (LoopPredecessor) MadeChange = true; } - if (!LoopPredecessor) + if (!LoopPredecessor) { + LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n"); return MadeChange; + } - LLVM_DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n"); - - SmallSet<BasicBlock *, 16> BBChanged; - for (unsigned i = 0, e = Buckets.size(); i != e; ++i) { - // The base address of each bucket is transformed into a phi and the others - // are rewritten as offsets of that variable. - - // We have a choice now of which instruction's memory operand we use as the - // base for the generated PHI. Always picking the first instruction in each - // bucket does not work well, specifically because that instruction might - // be a prefetch (and there are no pre-increment dcbt variants). Otherwise, - // the choice is somewhat arbitrary, because the backend will happily - // generate direct offsets from both the pre-incremented and - // post-incremented pointer values. Thus, we'll pick the first non-prefetch - // instruction in each bucket, and adjust the recurrence and other offsets - // accordingly. - for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) { - if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr)) - if (II->getIntrinsicID() == Intrinsic::prefetch) - continue; - - // If we'd otherwise pick the first element anyway, there's nothing to do. - if (j == 0) - break; - - // If our chosen element has no offset from the base pointer, there's - // nothing to do. - if (!Buckets[i].Elements[j].Offset || - Buckets[i].Elements[j].Offset->isZero()) - break; - - const SCEV *Offset = Buckets[i].Elements[j].Offset; - Buckets[i].BaseSCEV = SE->getAddExpr(Buckets[i].BaseSCEV, Offset); - for (auto &E : Buckets[i].Elements) { - if (E.Offset) - E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset)); - else - E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset)); - } - - std::swap(Buckets[i].Elements[j], Buckets[i].Elements[0]); - break; - } - - const SCEVAddRecExpr *BasePtrSCEV = - cast<SCEVAddRecExpr>(Buckets[i].BaseSCEV); - if (!BasePtrSCEV->isAffine()) - continue; - - LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); - assert(BasePtrSCEV->getLoop() == L && - "AddRec for the wrong loop?"); - - // The instruction corresponding to the Bucket's BaseSCEV must be the first - // in the vector of elements. - Instruction *MemI = Buckets[i].Elements.begin()->Instr; - Value *BasePtr = GetPointerOperand(MemI); - assert(BasePtr && "No pointer operand"); - - Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext()); - Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(), - BasePtr->getType()->getPointerAddressSpace()); - - const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart(); - if (!SE->isLoopInvariant(BasePtrStartSCEV, L)) - continue; - - const SCEVConstant *BasePtrIncSCEV = - dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE)); - if (!BasePtrIncSCEV) - continue; - BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV); - if (!isSafeToExpand(BasePtrStartSCEV, *SE)) - continue; - - LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); - - if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV)) - continue; - - PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount, - MemI->hasName() ? MemI->getName() + ".phi" : "", - Header->getFirstNonPHI()); - - SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart"); - Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy, - LoopPredecessor->getTerminator()); - - // Note that LoopPredecessor might occur in the predecessor list multiple - // times, and we need to add it the right number of times. - for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) { - if (*PI != LoopPredecessor) - continue; - - NewPHI->addIncoming(BasePtrStart, LoopPredecessor); - } - - Instruction *InsPoint = &*Header->getFirstInsertionPt(); - GetElementPtrInst *PtrInc = GetElementPtrInst::Create( - I8Ty, NewPHI, BasePtrIncSCEV->getValue(), - MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint); - PtrInc->setIsInBounds(IsPtrInBounds(BasePtr)); - for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) { - if (*PI == LoopPredecessor) - continue; - - NewPHI->addIncoming(PtrInc, *PI); - } - - Instruction *NewBasePtr; - if (PtrInc->getType() != BasePtr->getType()) - NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(), - PtrInc->hasName() ? PtrInc->getName() + ".cast" : "", InsPoint); - else - NewBasePtr = PtrInc; - - if (Instruction *IDel = dyn_cast<Instruction>(BasePtr)) - BBChanged.insert(IDel->getParent()); - BasePtr->replaceAllUsesWith(NewBasePtr); - RecursivelyDeleteTriviallyDeadInstructions(BasePtr); - - // Keep track of the replacement pointer values we've inserted so that we - // don't generate more pointer values than necessary. - SmallPtrSet<Value *, 16> NewPtrs; - NewPtrs.insert( NewBasePtr); - - for (auto I = std::next(Buckets[i].Elements.begin()), - IE = Buckets[i].Elements.end(); I != IE; ++I) { - Value *Ptr = GetPointerOperand(I->Instr); - assert(Ptr && "No pointer operand"); - if (NewPtrs.count(Ptr)) - continue; - - Instruction *RealNewPtr; - if (!I->Offset || I->Offset->getValue()->isZero()) { - RealNewPtr = NewBasePtr; - } else { - Instruction *PtrIP = dyn_cast<Instruction>(Ptr); - if (PtrIP && isa<Instruction>(NewBasePtr) && - cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent()) - PtrIP = nullptr; - else if (isa<PHINode>(PtrIP)) - PtrIP = &*PtrIP->getParent()->getFirstInsertionPt(); - else if (!PtrIP) - PtrIP = I->Instr; - - GetElementPtrInst *NewPtr = GetElementPtrInst::Create( - I8Ty, PtrInc, I->Offset->getValue(), - I->Instr->hasName() ? I->Instr->getName() + ".off" : "", PtrIP); - if (!PtrIP) - NewPtr->insertAfter(cast<Instruction>(PtrInc)); - NewPtr->setIsInBounds(IsPtrInBounds(Ptr)); - RealNewPtr = NewPtr; + // Check if a load/store has update form. This lambda is used by function + // collectCandidates which can collect candidates for types defined by lambda. + auto isUpdateFormCandidate = [&] (const Instruction *I, + const Value *PtrValue) { + assert((PtrValue && I) && "Invalid parameter!"); + // There are no update forms for Altivec vector load/stores. + if (ST && ST->hasAltivec() && + PtrValue->getType()->getPointerElementType()->isVectorTy()) + return false; + // See getPreIndexedAddressParts, the displacement for LDU/STDU has to + // be 4's multiple (DS-form). For i64 loads/stores when the displacement + // fits in a 16-bit signed field but isn't a multiple of 4, it will be + // useless and possible to break some original well-form addressing mode + // to make this pre-inc prep for it. + if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) { + const SCEV *LSCEV = SE->getSCEVAtScope(const_cast<Value *>(PtrValue), L); + const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV); + if (!LARSCEV || LARSCEV->getLoop() != L) + return false; + if (const SCEVConstant *StepConst = + dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) { + const APInt &ConstInt = StepConst->getValue()->getValue(); + if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0) + return false; } - - if (Instruction *IDel = dyn_cast<Instruction>(Ptr)) - BBChanged.insert(IDel->getParent()); - - Instruction *ReplNewPtr; - if (Ptr->getType() != RealNewPtr->getType()) { - ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(), - Ptr->hasName() ? Ptr->getName() + ".cast" : ""); - ReplNewPtr->insertAfter(RealNewPtr); - } else - ReplNewPtr = RealNewPtr; - - Ptr->replaceAllUsesWith(ReplNewPtr); - RecursivelyDeleteTriviallyDeadInstructions(Ptr); - - NewPtrs.insert(RealNewPtr); } + return true; + }; - MadeChange = true; - } + // Collect buckets of comparable addresses used by loads, stores and prefetch + // intrinsic for update form. + SmallVector<Bucket, 16> UpdateFormBuckets = + collectCandidates(L, isUpdateFormCandidate, MaxVars); - for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); - I != IE; ++I) { - if (BBChanged.count(*I)) - DeleteDeadPHIs(*I); - } + // Prepare for update form. + if (!UpdateFormBuckets.empty()) + MadeChange |= updateFormPrep(L, UpdateFormBuckets); return MadeChange; } diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 027e6bd1ba06..b6496f189a3a 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -79,7 +79,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, } static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, - AsmPrinter &Printer, bool isDarwin) { + AsmPrinter &Printer, bool IsDarwin) { MCContext &Ctx = Printer.OutContext; MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; @@ -137,10 +137,10 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, // Add ha16() / lo16() markers if required. switch (access) { case PPCII::MO_LO: - Expr = PPCMCExpr::createLo(Expr, isDarwin, Ctx); + Expr = PPCMCExpr::createLo(Expr, IsDarwin, Ctx); break; case PPCII::MO_HA: - Expr = PPCMCExpr::createHa(Expr, isDarwin, Ctx); + Expr = PPCMCExpr::createHa(Expr, IsDarwin, Ctx); break; } @@ -148,20 +148,20 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, } void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AsmPrinter &AP, bool isDarwin) { + AsmPrinter &AP, bool IsDarwin) { OutMI.setOpcode(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MCOperand MCOp; if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP, - isDarwin)) + IsDarwin)) OutMI.addOperand(MCOp); } } bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &OutMO, AsmPrinter &AP, - bool isDarwin) { + bool IsDarwin) { switch (MO.getType()) { default: llvm_unreachable("unknown operand type"); @@ -181,17 +181,20 @@ bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, return true; case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: - OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin); + OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, IsDarwin); return true; case MachineOperand::MO_JumpTableIndex: - OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin); + OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, IsDarwin); return true; case MachineOperand::MO_ConstantPoolIndex: - OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin); + OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, IsDarwin); return true; case MachineOperand::MO_BlockAddress: OutMO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP, - isDarwin); + IsDarwin); + return true; + case MachineOperand::MO_MCSymbol: + OutMO = GetSymbolRef(MO, MO.getMCSymbol(), AP, IsDarwin); return true; case MachineOperand::MO_RegisterMask: return false; diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp index 446246358e96..ac8ac060f460 100644 --- a/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -148,8 +148,8 @@ static MachineInstr *getVRegDefOrNull(MachineOperand *Op, if (!Op->isReg()) return nullptr; - unsigned Reg = Op->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = Op->getReg(); + if (!Register::isVirtualRegister(Reg)) return nullptr; return MRI->getVRegDef(Reg); @@ -344,8 +344,7 @@ bool PPCMIPeephole::simplifyCode(void) { unsigned TrueReg2 = TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI); - if (TrueReg1 == TrueReg2 - && TargetRegisterInfo::isVirtualRegister(TrueReg1)) { + if (TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)) { MachineInstr *DefMI = MRI->getVRegDef(TrueReg1); unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0; @@ -358,7 +357,7 @@ bool PPCMIPeephole::simplifyCode(void) { return false; unsigned DefReg = TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); - if (TargetRegisterInfo::isVirtualRegister(DefReg)) { + if (Register::isVirtualRegister(DefReg)) { MachineInstr *LoadMI = MRI->getVRegDef(DefReg); if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX) return true; @@ -444,7 +443,7 @@ bool PPCMIPeephole::simplifyCode(void) { unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2; unsigned TrueReg = TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI); - if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) + if (!Register::isVirtualRegister(TrueReg)) break; MachineInstr *DefMI = MRI->getVRegDef(TrueReg); if (!DefMI) @@ -453,8 +452,8 @@ bool PPCMIPeephole::simplifyCode(void) { auto isConvertOfSplat = [=]() -> bool { if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS) return false; - unsigned ConvReg = DefMI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(ConvReg)) + Register ConvReg = DefMI->getOperand(1).getReg(); + if (!Register::isVirtualRegister(ConvReg)) return false; MachineInstr *Splt = MRI->getVRegDef(ConvReg); return Splt && (Splt->getOpcode() == PPC::LXVWSX || @@ -481,9 +480,9 @@ bool PPCMIPeephole::simplifyCode(void) { // Splat fed by a shift. Usually when we align value to splat into // vector element zero. if (DefOpcode == PPC::XXSLDWI) { - unsigned ShiftRes = DefMI->getOperand(0).getReg(); - unsigned ShiftOp1 = DefMI->getOperand(1).getReg(); - unsigned ShiftOp2 = DefMI->getOperand(2).getReg(); + Register ShiftRes = DefMI->getOperand(0).getReg(); + Register ShiftOp1 = DefMI->getOperand(1).getReg(); + Register ShiftOp2 = DefMI->getOperand(2).getReg(); unsigned ShiftImm = DefMI->getOperand(3).getImm(); unsigned SplatImm = MI.getOperand(2).getImm(); if (ShiftOp1 == ShiftOp2) { @@ -507,7 +506,7 @@ bool PPCMIPeephole::simplifyCode(void) { // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant. unsigned TrueReg = TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI); - if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) + if (!Register::isVirtualRegister(TrueReg)) break; MachineInstr *DefMI = MRI->getVRegDef(TrueReg); @@ -518,8 +517,8 @@ bool PPCMIPeephole::simplifyCode(void) { TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); unsigned DefsReg2 = TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI); - if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) || - !TargetRegisterInfo::isVirtualRegister(DefsReg2)) + if (!Register::isVirtualRegister(DefsReg1) || + !Register::isVirtualRegister(DefsReg2)) break; MachineInstr *P1 = MRI->getVRegDef(DefsReg1); MachineInstr *P2 = MRI->getVRegDef(DefsReg2); @@ -533,8 +532,8 @@ bool PPCMIPeephole::simplifyCode(void) { if (RoundInstr->getOpcode() == PPC::FRSP && MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) { Simplified = true; - unsigned ConvReg1 = RoundInstr->getOperand(1).getReg(); - unsigned FRSPDefines = RoundInstr->getOperand(0).getReg(); + Register ConvReg1 = RoundInstr->getOperand(1).getReg(); + Register FRSPDefines = RoundInstr->getOperand(0).getReg(); MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines)); for (int i = 0, e = Use.getNumOperands(); i < e; ++i) if (Use.getOperand(i).isReg() && @@ -566,8 +565,8 @@ bool PPCMIPeephole::simplifyCode(void) { case PPC::EXTSH8: case PPC::EXTSH8_32_64: { if (!EnableSExtElimination) break; - unsigned NarrowReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(NarrowReg)) + Register NarrowReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(NarrowReg)) break; MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg); @@ -610,8 +609,8 @@ bool PPCMIPeephole::simplifyCode(void) { case PPC::EXTSW_32: case PPC::EXTSW_32_64: { if (!EnableSExtElimination) break; - unsigned NarrowReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(NarrowReg)) + Register NarrowReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(NarrowReg)) break; MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg); @@ -652,8 +651,8 @@ bool PPCMIPeephole::simplifyCode(void) { // We can eliminate EXTSW if the input is known to be already // sign-extended. LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n"); - unsigned TmpReg = - MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); + Register TmpReg = + MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), TmpReg); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG), @@ -679,8 +678,8 @@ bool PPCMIPeephole::simplifyCode(void) { if (MI.getOperand(2).getImm() != 0) break; - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) break; MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); @@ -695,8 +694,8 @@ bool PPCMIPeephole::simplifyCode(void) { SrcMI = SubRegMI; if (SubRegMI->getOpcode() == PPC::COPY) { - unsigned CopyReg = SubRegMI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(CopyReg)) + Register CopyReg = SubRegMI->getOperand(1).getReg(); + if (Register::isVirtualRegister(CopyReg)) SrcMI = MRI->getVRegDef(CopyReg); } @@ -757,7 +756,7 @@ bool PPCMIPeephole::simplifyCode(void) { break; // We don't have an ADD fed by LI's that can be transformed // Now we know that Op1 is the PHI node and Op2 is the dominator - unsigned DominatorReg = Op2.getReg(); + Register DominatorReg = Op2.getReg(); const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8 ? &PPC::G8RC_and_G8RC_NOX0RegClass @@ -927,7 +926,7 @@ static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1, } else if (Inst->isFullCopy()) NextReg = Inst->getOperand(1).getReg(); - if (NextReg == SrcReg || !TargetRegisterInfo::isVirtualRegister(NextReg)) + if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg)) break; SrcReg = NextReg; } @@ -949,9 +948,8 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB, (*BII).getOpcode() == PPC::BCC && (*BII).getOperand(1).isReg()) { // We optimize only if the condition code is used only by one BCC. - unsigned CndReg = (*BII).getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(CndReg) || - !MRI->hasOneNonDBGUse(CndReg)) + Register CndReg = (*BII).getOperand(1).getReg(); + if (!Register::isVirtualRegister(CndReg) || !MRI->hasOneNonDBGUse(CndReg)) return false; MachineInstr *CMPI = MRI->getVRegDef(CndReg); @@ -961,7 +959,7 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB, // We skip this BB if a physical register is used in comparison. for (MachineOperand &MO : CMPI->operands()) - if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (MO.isReg() && !Register::isVirtualRegister(MO.getReg())) return false; return true; @@ -1271,8 +1269,8 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) { // We touch up the compare instruction in MBB2 and move it to // a previous BB to handle partially redundant case. if (SwapOperands) { - unsigned Op1 = CMPI2->getOperand(1).getReg(); - unsigned Op2 = CMPI2->getOperand(2).getReg(); + Register Op1 = CMPI2->getOperand(1).getReg(); + Register Op2 = CMPI2->getOperand(2).getReg(); CMPI2->getOperand(1).setReg(Op2); CMPI2->getOperand(2).setReg(Op1); } @@ -1295,7 +1293,7 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) { MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2)); DebugLoc DL = CMPI2->getDebugLoc(); - unsigned NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass); + Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass); BuildMI(MBB2, MBB2.begin(), DL, TII->get(PPC::PHI), NewVReg) .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1) @@ -1334,8 +1332,8 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) { if (MI.getOpcode() != PPC::RLDICR) return false; - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); @@ -1414,8 +1412,8 @@ bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI, if (SHMI + MEMI != 63) return false; - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); @@ -1428,6 +1426,12 @@ bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI, if (!MRI->hasOneNonDBGUse(SrcReg)) return false; + assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands"); + assert(SrcMI->getOperand(1).isReg() && + "EXTSW's second operand should be a register"); + if (!Register::isVirtualRegister(SrcMI->getOperand(1).getReg())) + return false; + LLVM_DEBUG(dbgs() << "Combining pair: "); LLVM_DEBUG(SrcMI->dump()); LLVM_DEBUG(MI.dump()); diff --git a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp index d83c92276800..b1c0433641dd 100644 --- a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -57,6 +57,109 @@ namespace { MachineFunctionProperties::Property::NoVRegs); } + // This function removes any redundant load immediates. It has two level + // loops - The outer loop finds the load immediates BBI that could be used + // to replace following redundancy. The inner loop scans instructions that + // after BBI to find redundancy and update kill/dead flags accordingly. If + // AfterBBI is the same as BBI, it is redundant, otherwise any instructions + // that modify the def register of BBI would break the scanning. + // DeadOrKillToUnset is a pointer to the previous operand that had the + // kill/dead flag set. It keeps track of the def register of BBI, the use + // registers of AfterBBIs and the def registers of AfterBBIs. + bool removeRedundantLIs(MachineBasicBlock &MBB, + const TargetRegisterInfo *TRI) { + LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n"; + MBB.dump(); dbgs() << "\n"); + + DenseSet<MachineInstr *> InstrsToErase; + for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { + // Skip load immediate that is marked to be erased later because it + // cannot be used to replace any other instructions. + if (InstrsToErase.find(&*BBI) != InstrsToErase.end()) + continue; + // Skip non-load immediate. + unsigned Opc = BBI->getOpcode(); + if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS && + Opc != PPC::LIS8) + continue; + // Skip load immediate, where the operand is a relocation (e.g., $r3 = + // LI target-flags(ppc-lo) %const.0). + if (!BBI->getOperand(1).isImm()) + continue; + assert(BBI->getOperand(0).isReg() && + "Expected a register for the first operand"); + + LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump();); + + Register Reg = BBI->getOperand(0).getReg(); + int64_t Imm = BBI->getOperand(1).getImm(); + MachineOperand *DeadOrKillToUnset = nullptr; + if (BBI->getOperand(0).isDead()) { + DeadOrKillToUnset = &BBI->getOperand(0); + LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset + << " from load immediate " << *BBI + << " is a unsetting candidate\n"); + } + // This loop scans instructions after BBI to see if there is any + // redundant load immediate. + for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end(); + ++AfterBBI) { + // Track the operand that kill Reg. We would unset the kill flag of + // the operand if there is a following redundant load immediate. + int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI); + if (KillIdx != -1) { + assert(!DeadOrKillToUnset && "Shouldn't kill same register twice"); + DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx); + LLVM_DEBUG(dbgs() + << " Kill flag of " << *DeadOrKillToUnset << " from " + << *AfterBBI << " is a unsetting candidate\n"); + } + + if (!AfterBBI->modifiesRegister(Reg, TRI)) + continue; + // Finish scanning because Reg is overwritten by a non-load + // instruction. + if (AfterBBI->getOpcode() != Opc) + break; + assert(AfterBBI->getOperand(0).isReg() && + "Expected a register for the first operand"); + // Finish scanning because Reg is overwritten by a relocation or a + // different value. + if (!AfterBBI->getOperand(1).isImm() || + AfterBBI->getOperand(1).getImm() != Imm) + break; + + // It loads same immediate value to the same Reg, which is redundant. + // We would unset kill flag in previous Reg usage to extend live range + // of Reg first, then remove the redundancy. + if (DeadOrKillToUnset) { + LLVM_DEBUG(dbgs() + << " Unset dead/kill flag of " << *DeadOrKillToUnset + << " from " << *DeadOrKillToUnset->getParent()); + if (DeadOrKillToUnset->isDef()) + DeadOrKillToUnset->setIsDead(false); + else + DeadOrKillToUnset->setIsKill(false); + } + DeadOrKillToUnset = + AfterBBI->findRegisterDefOperand(Reg, true, true, TRI); + if (DeadOrKillToUnset) + LLVM_DEBUG(dbgs() + << " Dead flag of " << *DeadOrKillToUnset << " from " + << *AfterBBI << " is a unsetting candidate\n"); + InstrsToErase.insert(&*AfterBBI); + LLVM_DEBUG(dbgs() << " Remove redundant load immediate: "; + AfterBBI->dump()); + } + } + + for (MachineInstr *MI : InstrsToErase) { + MI->eraseFromParent(); + } + NumRemovedInPreEmit += InstrsToErase.size(); + return !InstrsToErase.empty(); + } + bool runOnMachineFunction(MachineFunction &MF) override { if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) return false; @@ -65,6 +168,7 @@ namespace { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); SmallVector<MachineInstr *, 4> InstrsToErase; for (MachineBasicBlock &MBB : MF) { + Changed |= removeRedundantLIs(MBB, TRI); for (MachineInstr &MI : MBB) { unsigned Opc = MI.getOpcode(); // Detect self copies - these can result from running AADB. @@ -111,7 +215,7 @@ namespace { if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn) continue; MachineInstr *CRSetMI = nullptr; - unsigned CRBit = Br->getOperand(0).getReg(); + Register CRBit = Br->getOperand(0).getReg(); unsigned CRReg = getCRFromCRBit(CRBit); bool SeenUse = false; MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend(); diff --git a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp index 3a83cc27439c..6e9042643820 100644 --- a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp +++ b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp @@ -79,8 +79,8 @@ bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) { for (auto SI = Splats.begin(); SI != Splats.end();) { MachineInstr *SMI = *SI; - unsigned SplatReg = SMI->getOperand(0).getReg(); - unsigned SrcReg = SMI->getOperand(1).getReg(); + Register SplatReg = SMI->getOperand(0).getReg(); + Register SrcReg = SMI->getOperand(1).getReg(); if (MI->modifiesRegister(SrcReg, TRI)) { switch (MI->getOpcode()) { @@ -102,7 +102,7 @@ bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) { // the QPX splat source register. unsigned SubRegIndex = TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg()); - unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex); + Register SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex); // Substitute both the explicit defined register, and also the // implicit def of the containing QPX register. diff --git a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp index 8eaa6dfe2bf7..3b71ed219c17 100644 --- a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp +++ b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp @@ -381,10 +381,10 @@ private: const MachineBranchProbabilityInfo *MBPI; // A vector to contain all the CR logical operations - std::vector<CRLogicalOpInfo> AllCRLogicalOps; + SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps; void initialize(MachineFunction &MFParm); void collectCRLogicals(); - bool handleCROp(CRLogicalOpInfo &CRI); + bool handleCROp(unsigned Idx); bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI); static bool isCRLogical(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); @@ -398,7 +398,7 @@ private: // Not using a range-based for loop here as the vector may grow while being // operated on. for (unsigned i = 0; i < AllCRLogicalOps.size(); i++) - Changed |= handleCROp(AllCRLogicalOps[i]); + Changed |= handleCROp(i); return Changed; } @@ -535,15 +535,15 @@ MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg, unsigned &Subreg, MachineInstr *&CpDef) { Subreg = -1; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return nullptr; MachineInstr *Copy = MRI->getVRegDef(Reg); CpDef = Copy; if (!Copy->isCopy()) return Copy; - unsigned CopySrc = Copy->getOperand(1).getReg(); + Register CopySrc = Copy->getOperand(1).getReg(); Subreg = Copy->getOperand(1).getSubReg(); - if (!TargetRegisterInfo::isVirtualRegister(CopySrc)) { + if (!Register::isVirtualRegister(CopySrc)) { const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); // Set the Subreg if (CopySrc == PPC::CR0EQ || CopySrc == PPC::CR6EQ) @@ -578,10 +578,11 @@ void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) { /// a unary CR logical might be used to change the condition code on a /// comparison feeding it. A nullary CR logical might simply be removable /// if the user of the bit it [un]sets can be transformed. -bool PPCReduceCRLogicals::handleCROp(CRLogicalOpInfo &CRI) { +bool PPCReduceCRLogicals::handleCROp(unsigned Idx) { // We can definitely split a block on the inputs to a binary CR operation // whose defs and (single) use are within the same block. bool Changed = false; + CRLogicalOpInfo CRI = AllCRLogicalOps[Idx]; if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR && CRI.DefsSingleUse) { Changed = splitBlockOnBinaryCROp(CRI); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 12554ea8d079..9ec26a19bdaa 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -325,13 +325,13 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { bool IsPositionIndependent = TM.isPositionIndependent(); if (hasBasePointer(MF)) { - if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent) + if (Subtarget.is32BitELFABI() && IsPositionIndependent) markSuperRegs(Reserved, PPC::R29); else markSuperRegs(Reserved, PPC::R30); } - if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent) + if (Subtarget.is32BitELFABI() && IsPositionIndependent) markSuperRegs(Reserved, PPC::R30); // Reserve Altivec registers when Altivec is unavailable. @@ -391,7 +391,7 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const { - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + assert(Register::isPhysicalRegister(PhysReg)); const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); const MachineFrameInfo &MFI = MF.getFrameInfo(); if (!TM.isPPC64()) @@ -425,7 +425,6 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case PPC::G8RC_NOX0RegClassID: case PPC::GPRC_NOR0RegClassID: case PPC::SPERCRegClassID: - case PPC::SPE4RCRegClassID: case PPC::G8RCRegClassID: case PPC::GPRCRegClassID: { unsigned FP = TFI->hasFP(MF) ? 1 : 0; @@ -527,7 +526,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { // Fortunately, a frame greater than 32K is rare. const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { if (LP64) @@ -549,7 +548,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { } bool KillNegSizeReg = MI.getOperand(1).isKill(); - unsigned NegSizeReg = MI.getOperand(1).getReg(); + Register NegSizeReg = MI.getOperand(1).getReg(); // Grow the stack and update the stack pointer link, then determine the // address of new allocated space. @@ -655,8 +654,8 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); - unsigned SrcReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue // an MFOCRF to save all of the CRBits and, if needed, kill the SrcReg. @@ -700,8 +699,8 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); - unsigned DestReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_CR does not define its destination"); @@ -744,8 +743,8 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); - unsigned SrcReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register SrcReg = MI.getOperand(0).getReg(); // Search up the BB to find the definition of the CR bit. MachineBasicBlock::reverse_iterator Ins; @@ -823,8 +822,8 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); - unsigned DestReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_CRBIT does not define its destination"); @@ -833,7 +832,7 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, BuildMI(MBB, II, dl, TII.get(TargetOpcode::IMPLICIT_DEF), DestReg); - unsigned RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), RegO) .addReg(getCRFromCRBit(DestReg)); @@ -870,8 +869,8 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, DebugLoc dl = MI.getDebugLoc(); const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC); - unsigned SrcReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(GPRC); + Register SrcReg = MI.getOperand(0).getReg(); BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg) .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); @@ -896,8 +895,8 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, DebugLoc dl = MI.getDebugLoc(); const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC); - unsigned DestReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(GPRC); + Register DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_VRSAVE does not define its destination"); @@ -1128,7 +1127,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, OperandBase = OffsetOperandNo; } - unsigned StackReg = MI.getOperand(FIOperandNum).getReg(); + Register StackReg = MI.getOperand(FIOperandNum).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true); } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index af0dff6347a6..4719e947b172 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -253,15 +253,14 @@ def RM: PPCReg<"**ROUNDING MODE**">; /// Register classes // Allocate volatiles first // then nonvolatiles in reverse order since stmw/lmw save from rN to r31 -def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12), - (sequence "R%u", 30, 13), - R31, R0, R1, FP, BP)> { +def GPRC : RegisterClass<"PPC", [i32,f32], 32, (add (sequence "R%u", 2, 12), + (sequence "R%u", 30, 13), + R31, R0, R1, FP, BP)> { // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so // put it at the end of the list. let AltOrders = [(add (sub GPRC, R2), R2)]; let AltOrderSelect = [{ - const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>(); - return S.isPPC64() && S.isSVR4ABI(); + return MF.getSubtarget<PPCSubtarget>().is64BitELFABI(); }]; } @@ -272,21 +271,19 @@ def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12), // put it at the end of the list. let AltOrders = [(add (sub G8RC, X2), X2)]; let AltOrderSelect = [{ - const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>(); - return S.isPPC64() && S.isSVR4ABI(); + return MF.getSubtarget<PPCSubtarget>().is64BitELFABI(); }]; } // For some instructions r0 is special (representing the value 0 instead of // the value in the r0 register), and we use these register subclasses to // prevent r0 from being allocated for use by those instructions. -def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)> { +def GPRC_NOR0 : RegisterClass<"PPC", [i32,f32], 32, (add (sub GPRC, R0), ZERO)> { // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so // put it at the end of the list. let AltOrders = [(add (sub GPRC_NOR0, R2), R2)]; let AltOrderSelect = [{ - const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>(); - return S.isPPC64() && S.isSVR4ABI(); + return MF.getSubtarget<PPCSubtarget>().is64BitELFABI(); }]; } @@ -295,8 +292,7 @@ def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)> { // put it at the end of the list. let AltOrders = [(add (sub G8RC_NOX0, X2), X2)]; let AltOrderSelect = [{ - const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>(); - return S.isPPC64() && S.isSVR4ABI(); + return MF.getSubtarget<PPCSubtarget>().is64BitELFABI(); }]; } @@ -304,8 +300,6 @@ def SPERC : RegisterClass<"PPC", [f64], 64, (add (sequence "S%u", 2, 12), (sequence "S%u", 30, 13), S31, S0, S1)>; -def SPE4RC : RegisterClass<"PPC", [f32], 32, (add GPRC)>; - // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 // ABI the size of the Floating-point register save area is determined by the // allocated non-volatile register with the lowest register number, as FP diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 6aa7528634d3..10568ed4b655 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -60,7 +60,7 @@ PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU, InstrInfo(*this), TLInfo(TM, *this) {} void PPCSubtarget::initializeEnvironment() { - StackAlignment = 16; + StackAlignment = Align(16); DarwinDirective = PPC::DIR_NONE; HasMFOCRF = false; Has64BitSupport = false; @@ -145,7 +145,8 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isDarwin()) HasLazyResolverStubs = true; - if (TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() || + if ((TargetTriple.isOSFreeBSD() && TargetTriple.getOSMajorVersion() >= 13) || + TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() || TargetTriple.isMusl()) SecurePlt = true; @@ -228,18 +229,13 @@ bool PPCSubtarget::enableSubRegLiveness() const { return UseSubRegLiveness; } -unsigned char -PPCSubtarget::classifyGlobalReference(const GlobalValue *GV) const { - // Note that currently we don't generate non-pic references. - // If a caller wants that, this will have to be updated. - +bool PPCSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { // Large code model always uses the TOC even for local symbols. if (TM.getCodeModel() == CodeModel::Large) - return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG; - + return true; if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) - return PPCII::MO_PIC_FLAG; - return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG; + return false; + return true; } bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); } diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 55fec1cb6d99..d96c2893aee9 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -78,7 +78,7 @@ protected: /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. - unsigned StackAlignment; + Align StackAlignment; /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; @@ -166,7 +166,7 @@ public: /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. - unsigned getStackAlignment() const { return StackAlignment; } + Align getStackAlignment() const { return StackAlignment; } /// getDarwinDirective - Returns the -m directive specified for the cpu. /// @@ -210,7 +210,11 @@ public: /// instructions, regardless of whether we are in 32-bit or 64-bit mode. bool has64BitSupport() const { return Has64BitSupport; } // useSoftFloat - Return true if soft-float option is turned on. - bool useSoftFloat() const { return !HasHardFloat; } + bool useSoftFloat() const { + if (isAIXABI() && !HasHardFloat) + report_fatal_error("soft-float is not yet supported on AIX."); + return !HasHardFloat; + } /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit /// registers in 32-bit mode when possible. This can only true if @@ -277,11 +281,11 @@ public: bool hasDirectMove() const { return HasDirectMove; } bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; } - unsigned getPlatformStackAlignment() const { + Align getPlatformStackAlignment() const { if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned()) - return 32; + return Align(32); - return 16; + return Align(16); } // DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no @@ -316,6 +320,9 @@ public: bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); } bool isELFv2ABI() const; + bool is64BitELFABI() const { return isSVR4ABI() && isPPC64(); } + bool is32BitELFABI() const { return isSVR4ABI() && !isPPC64(); } + /// Originally, this function return hasISEL(). Now we always enable it, /// but may expand the ISEL instruction later. bool enableEarlyIfConversion() const override { return true; } @@ -337,9 +344,8 @@ public: bool enableSubRegLiveness() const override; - /// classifyGlobalReference - Classify a global variable reference for the - /// current subtarget accourding to how we should reference it. - unsigned char classifyGlobalReference(const GlobalValue *GV) const; + /// True if the GV will be accessed via an indirect symbol. + bool isGVIndirectSymbol(const GlobalValue *GV) const; bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; } }; diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index fb826c4a32f1..8f313d9d01c4 100644 --- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -74,8 +74,8 @@ protected: LLVM_DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << MI); - unsigned OutReg = MI.getOperand(0).getReg(); - unsigned InReg = MI.getOperand(1).getReg(); + Register OutReg = MI.getOperand(0).getReg(); + Register InReg = MI.getOperand(1).getReg(); DebugLoc DL = MI.getDebugLoc(); unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3; unsigned Opc1, Opc2; diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp index 3eb0569fb955..895ae6744421 100644 --- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp +++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp @@ -95,7 +95,8 @@ namespace { protected: bool hasTOCLoReloc(const MachineInstr &MI) { if (MI.getOpcode() == PPC::LDtocL || - MI.getOpcode() == PPC::ADDItocL) + MI.getOpcode() == PPC::ADDItocL || + MI.getOpcode() == PPC::LWZtocL) return true; for (const MachineOperand &MO : MI.operands()) { @@ -109,11 +110,15 @@ protected: bool processBlock(MachineBasicBlock &MBB) { bool Changed = false; + const bool isPPC64 = + MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64(); + const unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; + for (auto &MI : MBB) { if (!hasTOCLoReloc(MI)) continue; - MI.addOperand(MachineOperand::CreateReg(PPC::X2, + MI.addOperand(MachineOperand::CreateReg(TOCReg, false /*IsDef*/, true /*IsImp*/)); Changed = true; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index ce00f848dd72..abefee8b339d 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -93,7 +93,7 @@ EnableMachineCombinerPass("ppc-machine-combiner", static cl::opt<bool> ReduceCRLogical("ppc-reduce-cr-logicals", cl::desc("Expand eligible cr-logical binary ops to branches"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); extern "C" void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target()); @@ -185,12 +185,13 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, } static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { - // If it isn't a Mach-O file then it's going to be a linux ELF - // object file. if (TT.isOSDarwin()) - return llvm::make_unique<TargetLoweringObjectFileMachO>(); + return std::make_unique<TargetLoweringObjectFileMachO>(); + + if (TT.isOSAIX()) + return std::make_unique<TargetLoweringObjectFileXCOFF>(); - return llvm::make_unique<PPC64LinuxTargetObjectFile>(); + return std::make_unique<PPC64LinuxTargetObjectFile>(); } static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, @@ -248,10 +249,19 @@ static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT, report_fatal_error("Target does not support the kernel CodeModel", false); return *CM; } - if (!TT.isOSDarwin() && !JIT && - (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)) - return CodeModel::Medium; - return CodeModel::Small; + + if (JIT) + return CodeModel::Small; + if (TT.isOSAIX()) + return CodeModel::Small; + + assert(TT.isOSBinFormatELF() && "All remaining PPC OSes are ELF based."); + + if (TT.isArch32Bit()) + return CodeModel::Small; + + assert(TT.isArch64Bit() && "Unsupported PPC architecture."); + return CodeModel::Medium; } @@ -259,8 +269,8 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) { const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>(); ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ? - llvm::make_unique<PPCPreRASchedStrategy>(C) : - llvm::make_unique<GenericScheduler>(C)); + std::make_unique<PPCPreRASchedStrategy>(C) : + std::make_unique<GenericScheduler>(C)); // add DAG Mutations here. DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); return DAG; @@ -271,8 +281,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler( const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>(); ScheduleDAGMI *DAG = new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ? - llvm::make_unique<PPCPostRASchedStrategy>(C) : - llvm::make_unique<PostGenericScheduler>(C), true); + std::make_unique<PPCPostRASchedStrategy>(C) : + std::make_unique<PostGenericScheduler>(C), true); // add DAG Mutations here. return DAG; } @@ -328,7 +338,7 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique<PPCSubtarget>( + I = std::make_unique<PPCSubtarget>( TargetTriple, CPU, // FIXME: It would be good to have the subtarget additions here // not necessary. Anything that turns them on/off (overrides) ends diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index ff3dfbfaca05..f51300c656aa 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -594,10 +594,37 @@ bool PPCTTIImpl::enableInterleavedAccessVectorization() { return true; } -unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) { - if (Vector && !ST->hasAltivec() && !ST->hasQPX()) - return 0; - return ST->hasVSX() ? 64 : 32; +unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const { + assert(ClassID == GPRRC || ClassID == FPRRC || + ClassID == VRRC || ClassID == VSXRC); + if (ST->hasVSX()) { + assert(ClassID == GPRRC || ClassID == VSXRC); + return ClassID == GPRRC ? 32 : 64; + } + assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC); + return 32; +} + +unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const { + if (Vector) + return ST->hasVSX() ? VSXRC : VRRC; + else if (Ty && Ty->getScalarType()->isFloatTy()) + return ST->hasVSX() ? VSXRC : FPRRC; + else + return GPRRC; +} + +const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const { + + switch (ClassID) { + default: + llvm_unreachable("unknown register class"); + return "PPC::unknown register class"; + case GPRRC: return "PPC::GPRRC"; + case FPRRC: return "PPC::FPRRC"; + case VRRC: return "PPC::VRRC"; + case VSXRC: return "PPC::VSXRC"; + } } unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const { @@ -613,7 +640,7 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const { } -unsigned PPCTTIImpl::getCacheLineSize() { +unsigned PPCTTIImpl::getCacheLineSize() const { // Check first if the user specified a custom line size. if (CacheLineSize.getNumOccurrences() > 0) return CacheLineSize; @@ -628,7 +655,7 @@ unsigned PPCTTIImpl::getCacheLineSize() { return 64; } -unsigned PPCTTIImpl::getPrefetchDistance() { +unsigned PPCTTIImpl::getPrefetchDistance() const { // This seems like a reasonable default for the BG/Q (this pass is enabled, by // default, only on the BG/Q). return 300; @@ -752,6 +779,35 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { return 0; return Cost; + + } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) { + if (ST->hasP9Altivec()) { + if (ISD == ISD::INSERT_VECTOR_ELT) + // A move-to VSR and a permute/insert. Assume vector operation cost + // for both (cost will be 2x on P9). + return vectorCostAdjustment(2, Opcode, Val, nullptr); + + // It's an extract. Maybe we can do a cheap move-from VSR. + unsigned EltSize = Val->getScalarSizeInBits(); + if (EltSize == 64) { + unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0; + if (Index == MfvsrdIndex) + return 1; + } else if (EltSize == 32) { + unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1; + if (Index == MfvsrwzIndex) + return 1; + } + + // We need a vector extract (or mfvsrld). Assume vector operation cost. + // The cost of the load constant for a vector extract is disregarded + // (invariant, easily schedulable). + return vectorCostAdjustment(1, Opcode, Val, nullptr); + + } else if (ST->hasDirectMove()) + // Assume permute has standard cost. + // Assume move-to/move-from VSR have 2x standard cost. + return 3; } // Estimated cost of a load-hit-store delay. This was obtained diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index 5d76ee418b69..83a70364bf68 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -72,10 +72,16 @@ public: TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; bool enableInterleavedAccessVectorization(); - unsigned getNumberOfRegisters(bool Vector); + + enum PPCRegisterClass { + GPRRC, FPRRC, VRRC, VSXRC + }; + unsigned getNumberOfRegisters(unsigned ClassID) const; + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const; + const char* getRegisterClassName(unsigned ClassID) const; unsigned getRegisterBitWidth(bool Vector) const; - unsigned getCacheLineSize(); - unsigned getPrefetchDistance(); + unsigned getCacheLineSize() const override; + unsigned getPrefetchDistance() const override; unsigned getMaxInterleaveFactor(unsigned VF); int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2); int getArithmeticInstrCost( diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp index 719ed7b63878..3463bbbdc5f0 100644 --- a/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -50,7 +50,7 @@ namespace { bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC, MachineRegisterInfo &MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { return RC->hasSubClassEq(MRI.getRegClass(Reg)); } else if (RC->contains(Reg)) { return true; @@ -102,7 +102,7 @@ protected: IsVSFReg(SrcMO.getReg(), MRI)) && "Unknown source for a VSX copy"); - unsigned NewVReg = MRI.createVirtualRegister(SrcRC); + Register NewVReg = MRI.createVirtualRegister(SrcRC); BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg) .addImm(1) // add 1, not 0, because there is no implicit clearing @@ -124,7 +124,7 @@ protected: "Unknown destination for a VSX copy"); // Copy the VSX value into a new VSX register of the correct subclass. - unsigned NewVReg = MRI.createVirtualRegister(DstRC); + Register NewVReg = MRI.createVirtualRegister(DstRC); BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg) .add(SrcMO); diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index ce78239df0a8..5e150be544ed 100644 --- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -126,8 +126,8 @@ protected: if (!AddendMI->isFullCopy()) continue; - unsigned AddendSrcReg = AddendMI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) { + Register AddendSrcReg = AddendMI->getOperand(1).getReg(); + if (Register::isVirtualRegister(AddendSrcReg)) { if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) != MRI.getRegClass(AddendSrcReg)) continue; @@ -182,12 +182,12 @@ protected: // %5 = A-form-op %5, %5, %11; // where %5 and %11 are both kills. This case would be skipped // otherwise. - unsigned OldFMAReg = MI.getOperand(0).getReg(); + Register OldFMAReg = MI.getOperand(0).getReg(); // Find one of the product operands that is killed by this instruction. unsigned KilledProdOp = 0, OtherProdOp = 0; - unsigned Reg2 = MI.getOperand(2).getReg(); - unsigned Reg3 = MI.getOperand(3).getReg(); + Register Reg2 = MI.getOperand(2).getReg(); + Register Reg3 = MI.getOperand(3).getReg(); if (LIS->getInterval(Reg2).Query(FMAIdx).isKill() && Reg2 != OldFMAReg) { KilledProdOp = 2; @@ -208,14 +208,14 @@ protected: // legality checks above, the live range for the addend source register // could be extended), but it seems likely that such a trivial copy can // be coalesced away later, and thus is not worth the effort. - if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) && + if (Register::isVirtualRegister(AddendSrcReg) && !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) continue; // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3. - unsigned KilledProdReg = MI.getOperand(KilledProdOp).getReg(); - unsigned OtherProdReg = MI.getOperand(OtherProdOp).getReg(); + Register KilledProdReg = MI.getOperand(KilledProdOp).getReg(); + Register OtherProdReg = MI.getOperand(OtherProdOp).getReg(); unsigned AddSubReg = AddendMI->getOperand(1).getSubReg(); unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg(); @@ -314,7 +314,7 @@ protected: // Extend the live interval of the addend source (it might end at the // copy to be removed, or somewhere in between there and here). This // is necessary only if it is a physical register. - if (!TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) + if (!Register::isVirtualRegister(AddendSrcReg)) for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid(); ++Units) { unsigned Unit = *Units; diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 44175af7f9b6..c3729da0b07b 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -158,7 +158,7 @@ private: // Return true iff the given register is in the given class. bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return RC->hasSubClassEq(MRI->getRegClass(Reg)); return RC->contains(Reg); } @@ -253,7 +253,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (isAnyVecReg(Reg, Partial)) { RelevantInstr = true; break; @@ -566,7 +566,7 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg, CopySrcReg = MI->getOperand(2).getReg(); } - if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) { + if (!Register::isVirtualRegister(CopySrcReg)) { if (!isScalarVecReg(CopySrcReg)) SwapVector[VecIdx].MentionsPhysVR = 1; return CopySrcReg; @@ -601,11 +601,11 @@ void PPCVSXSwapRemoval::formWebs() { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!isVecReg(Reg) && !isScalarVecReg(Reg)) continue; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!Register::isVirtualRegister(Reg)) { if (!(MI->isCopy() && isScalarVecReg(Reg))) SwapVector[EntryIdx].MentionsPhysVR = 1; continue; @@ -667,7 +667,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { // than a swap instruction. else if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; - unsigned DefReg = MI->getOperand(0).getReg(); + Register DefReg = MI->getOperand(0).getReg(); // We skip debug instructions in the analysis. (Note that debug // location information is still maintained by this optimization @@ -695,9 +695,9 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { // other than a swap instruction. } else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; - unsigned UseReg = MI->getOperand(0).getReg(); + Register UseReg = MI->getOperand(0).getReg(); MachineInstr *DefMI = MRI->getVRegDef(UseReg); - unsigned DefReg = DefMI->getOperand(0).getReg(); + Register DefReg = DefMI->getOperand(0).getReg(); int DefIdx = SwapMap[DefMI]; if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad || @@ -756,7 +756,7 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() { if (!SwapVector[Repr].WebRejected) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; - unsigned DefReg = MI->getOperand(0).getReg(); + Register DefReg = MI->getOperand(0).getReg(); for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) { int UseIdx = SwapMap[&UseMI]; @@ -772,7 +772,7 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() { if (!SwapVector[Repr].WebRejected) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; - unsigned UseReg = MI->getOperand(0).getReg(); + Register UseReg = MI->getOperand(0).getReg(); MachineInstr *DefMI = MRI->getVRegDef(UseReg); int DefIdx = SwapMap[DefMI]; SwapVector[DefIdx].WillRemove = 1; @@ -869,8 +869,8 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { Selector = 3 - Selector; MI->getOperand(3).setImm(Selector); - unsigned Reg1 = MI->getOperand(1).getReg(); - unsigned Reg2 = MI->getOperand(2).getReg(); + Register Reg1 = MI->getOperand(1).getReg(); + Register Reg2 = MI->getOperand(2).getReg(); MI->getOperand(1).setReg(Reg2); MI->getOperand(2).setReg(Reg1); @@ -894,9 +894,9 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { LLVM_DEBUG(dbgs() << "Changing SUBREG_TO_REG: "); LLVM_DEBUG(MI->dump()); - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); - unsigned NewVReg = MRI->createVirtualRegister(DstRC); + Register NewVReg = MRI->createVirtualRegister(DstRC); MI->getOperand(0).setReg(NewVReg); LLVM_DEBUG(dbgs() << " Into: "); @@ -910,8 +910,8 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { // prior to the swap, and from VSRC to VRRC following the swap. // Coalescing will usually remove all this mess. if (DstRC == &PPC::VRRCRegClass) { - unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass); - unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass); + Register VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass); + Register VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass); BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), TII->get(PPC::COPY), VSRCTmp1) |