diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2014-11-24 09:08:18 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2014-11-24 09:08:18 +0000 |
commit | 5ca98fd98791947eba83a1ed3f2c8191ef7afa6c (patch) | |
tree | f5944309621cee4fe0976be6f9ac619b7ebfc4c2 /lib/Target/PowerPC | |
parent | 68bcb7db193e4bc81430063148253d30a791023e (diff) | |
download | src-5ca98fd98791947eba83a1ed3f2c8191ef7afa6c.tar.gz src-5ca98fd98791947eba83a1ed3f2c8191ef7afa6c.zip |
Notes
Diffstat (limited to 'lib/Target/PowerPC')
78 files changed, 11970 insertions, 5066 deletions
diff --git a/lib/Target/PowerPC/AsmParser/CMakeLists.txt b/lib/Target/PowerPC/AsmParser/CMakeLists.txt index 3aa59c00c369..408858e424d5 100644 --- a/lib/Target/PowerPC/AsmParser/CMakeLists.txt +++ b/lib/Target/PowerPC/AsmParser/CMakeLists.txt @@ -1,8 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMPowerPCAsmParser PPCAsmParser.cpp ) - -add_dependencies(LLVMPowerPCAsmParser PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt index 02ebf1d3d3ed..801f27bb7bc3 100644 --- a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt +++ b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt --------------*- Conf -*--===; +;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt -------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -19,5 +19,5 @@ type = Library name = PowerPCAsmParser parent = PowerPC -required_libraries = PowerPCDesc PowerPCInfo MC MCParser Support +required_libraries = MC MCParser PowerPCDesc PowerPCInfo Support add_to_library_groups = PowerPC diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index fe83fe1438ce..d7066d58709a 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -9,21 +9,23 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCMCExpr.h" -#include "llvm/MC/MCTargetAsmParser.h" -#include "llvm/MC/MCStreamer.h" +#include "PPCTargetStreamer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -94,6 +96,44 @@ static unsigned VRegs[32] = { PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 }; +static unsigned VSRegs[64] = { + PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3, + PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7, + PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11, + PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15, + PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19, + PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23, + PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27, + PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31, + + PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3, + PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, + PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11, + PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15, + PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19, + PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23, + PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27, + PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31 +}; +static unsigned VSFRegs[64] = { + PPC::F0, PPC::F1, PPC::F2, PPC::F3, + PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, + PPC::F12, PPC::F13, PPC::F14, PPC::F15, + PPC::F16, PPC::F17, PPC::F18, PPC::F19, + PPC::F20, PPC::F21, PPC::F22, PPC::F23, + PPC::F24, PPC::F25, PPC::F26, PPC::F27, + PPC::F28, PPC::F29, PPC::F30, PPC::F31, + + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, + PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, + PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, + PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, + PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, + PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, + PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, + PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 +}; static unsigned CRBITRegs[32] = { PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, @@ -177,6 +217,7 @@ class PPCAsmParser : public MCTargetAsmParser { MCAsmParser &Parser; const MCInstrInfo &MII; bool IsPPC64; + bool IsDarwin; MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } @@ -185,30 +226,34 @@ class PPCAsmParser : public MCTargetAsmParser { bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } bool isPPC64() const { return IsPPC64; } + bool isDarwin() const { return IsDarwin; } bool MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal); - virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; const MCExpr *ExtractModifierFromExpr(const MCExpr *E, PPCMCExpr::VariantKind &Variant); const MCExpr *FixupVariantKind(const MCExpr *E); bool ParseExpression(const MCExpr *&EVal); + bool ParseDarwinExpression(const MCExpr *&EVal); - bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool ParseOperand(OperandVector &Operands); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveTC(unsigned Size, SMLoc L); bool ParseDirectiveMachine(SMLoc L); + bool ParseDarwinDirectiveMachine(SMLoc L); + bool ParseDirectiveAbiVersion(SMLoc L); + bool ParseDirectiveLocalEntry(SMLoc L); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm); + OperandVector &Operands, MCStreamer &Out, + unsigned &ErrorInfo, + bool MatchingInlineAsm) override; - void ProcessInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Ops); + void ProcessInstruction(MCInst &Inst, const OperandVector &Ops); /// @name Auto-generated Match Functions /// { @@ -221,27 +266,29 @@ class PPCAsmParser : public MCTargetAsmParser { public: PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &_MII) + const MCInstrInfo &_MII, + const MCTargetOptions &Options) : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) { // Check for 64-bit vs. 32-bit pointer mode. Triple TheTriple(STI.getTargetTriple()); IsPPC64 = (TheTriple.getArch() == Triple::ppc64 || TheTriple.getArch() == Triple::ppc64le); + IsDarwin = TheTriple.isMacOSX(); // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } - virtual bool ParseInstruction(ParseInstructionInfo &Info, - StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; - virtual bool ParseDirective(AsmToken DirectiveID); + bool ParseDirective(AsmToken DirectiveID) override; - unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind); + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, + unsigned Kind) override; - virtual const MCExpr *applyModifierToExpr(const MCExpr *E, - MCSymbolRefExpr::VariantKind, - MCContext &Ctx); + const MCExpr *applyModifierToExpr(const MCExpr *E, + MCSymbolRefExpr::VariantKind, + MCContext &Ctx) override; }; /// PPCOperand - Instances of this class represent a parsed PowerPC machine @@ -306,10 +353,10 @@ public: } /// getStartLoc - Get the location of the first token of this operand. - SMLoc getStartLoc() const { return StartLoc; } + SMLoc getStartLoc() const override { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. - SMLoc getEndLoc() const { return EndLoc; } + SMLoc getEndLoc() const override { return EndLoc; } /// isPPC64 - True if this operand is for an instruction in 64-bit mode. bool isPPC64() const { return IsPPC64; } @@ -334,11 +381,16 @@ public: return TLSReg.Sym; } - unsigned getReg() const { + unsigned getReg() const override { assert(isRegNumber() && "Invalid access!"); return (unsigned) Imm.Val; } + unsigned getVSReg() const { + assert(isVSRegNumber() && "Invalid access!"); + return (unsigned) Imm.Val; + } + unsigned getCCReg() const { assert(isCCRegNumber() && "Invalid access!"); return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal); @@ -354,8 +406,9 @@ public: return 7 - countTrailingZeros<uint64_t>(Imm.Val); } - bool isToken() const { return Kind == Token; } - bool isImm() const { return Kind == Immediate || Kind == Expression; } + bool isToken() const override { return Kind == Token; } + bool isImm() const override { return Kind == Immediate || Kind == Expression; } + bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); } bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); } bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); } bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); } @@ -376,6 +429,7 @@ public: (Kind == Immediate && isInt<16>(getImm()) && (getImm() & 3) == 0); } bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); } + bool isVSRegNumber() const { return Kind == Immediate && isUInt<6>(getImm()); } bool isCCRegNumber() const { return (Kind == Expression && isUInt<3>(getExprCRVal())) || (Kind == Immediate @@ -386,8 +440,8 @@ public: && isUInt<5>(getImm())); } bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) && isPowerOf2_32(getImm()); } - bool isMem() const { return false; } - bool isReg() const { return false; } + bool isMem() const override { return false; } + bool isReg() const override { return false; } void addRegOperands(MCInst &Inst, unsigned N) const { llvm_unreachable("addRegOperands"); @@ -442,6 +496,16 @@ public: Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()])); } + void addRegVSRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(VSRegs[getVSReg()])); + } + + void addRegVSFRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()])); + } + void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()])); @@ -483,10 +547,11 @@ public: return StringRef(Tok.Data, Tok.Length); } - virtual void print(raw_ostream &OS) const; + void print(raw_ostream &OS) const override; - static PPCOperand *CreateToken(StringRef Str, SMLoc S, bool IsPPC64) { - PPCOperand *Op = new PPCOperand(Token); + static std::unique_ptr<PPCOperand> CreateToken(StringRef Str, SMLoc S, + bool IsPPC64) { + auto Op = make_unique<PPCOperand>(Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -495,22 +560,27 @@ public: return Op; } - static PPCOperand *CreateTokenWithStringCopy(StringRef Str, SMLoc S, - bool IsPPC64) { + static std::unique_ptr<PPCOperand> + CreateTokenWithStringCopy(StringRef Str, SMLoc S, bool IsPPC64) { // Allocate extra memory for the string and copy it. + // FIXME: This is incorrect, Operands are owned by unique_ptr with a default + // deleter which will destroy them by simply using "delete", not correctly + // calling operator delete on this extra memory after calling the dtor + // explicitly. void *Mem = ::operator new(sizeof(PPCOperand) + Str.size()); - PPCOperand *Op = new (Mem) PPCOperand(Token); - Op->Tok.Data = (const char *)(Op + 1); + std::unique_ptr<PPCOperand> Op(new (Mem) PPCOperand(Token)); + Op->Tok.Data = (const char *)(Op.get() + 1); Op->Tok.Length = Str.size(); - std::memcpy((char *)(Op + 1), Str.data(), Str.size()); + std::memcpy((void *)Op->Tok.Data, Str.data(), Str.size()); Op->StartLoc = S; Op->EndLoc = S; Op->IsPPC64 = IsPPC64; return Op; } - static PPCOperand *CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) { - PPCOperand *Op = new PPCOperand(Immediate); + static std::unique_ptr<PPCOperand> CreateImm(int64_t Val, SMLoc S, SMLoc E, + bool IsPPC64) { + auto Op = make_unique<PPCOperand>(Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -518,9 +588,9 @@ public: return Op; } - static PPCOperand *CreateExpr(const MCExpr *Val, - SMLoc S, SMLoc E, bool IsPPC64) { - PPCOperand *Op = new PPCOperand(Expression); + static std::unique_ptr<PPCOperand> CreateExpr(const MCExpr *Val, SMLoc S, + SMLoc E, bool IsPPC64) { + auto Op = make_unique<PPCOperand>(Expression); Op->Expr.Val = Val; Op->Expr.CRVal = EvaluateCRExpr(Val); Op->StartLoc = S; @@ -529,9 +599,9 @@ public: return Op; } - static PPCOperand *CreateTLSReg(const MCSymbolRefExpr *Sym, - SMLoc S, SMLoc E, bool IsPPC64) { - PPCOperand *Op = new PPCOperand(TLSRegister); + static std::unique_ptr<PPCOperand> + CreateTLSReg(const MCSymbolRefExpr *Sym, SMLoc S, SMLoc E, bool IsPPC64) { + auto Op = make_unique<PPCOperand>(TLSRegister); Op->TLSReg.Sym = Sym; Op->StartLoc = S; Op->EndLoc = E; @@ -539,8 +609,8 @@ public: return Op; } - static PPCOperand *CreateFromMCExpr(const MCExpr *Val, - SMLoc S, SMLoc E, bool IsPPC64) { + static std::unique_ptr<PPCOperand> + CreateFromMCExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) { if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Val)) return CreateImm(CE->getValue(), S, E, IsPPC64); @@ -571,10 +641,8 @@ void PPCOperand::print(raw_ostream &OS) const { } } - -void PPCAsmParser:: -ProcessInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +void PPCAsmParser::ProcessInstruction(MCInst &Inst, + const OperandVector &Operands) { int Opcode = Inst.getOpcode(); switch (Opcode) { case PPC::LAx: { @@ -854,11 +922,10 @@ ProcessInstruction(MCInst &Inst, } } -bool PPCAsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm) { +bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { MCInst Inst; switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) { @@ -867,7 +934,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // Post-process instructions (typically extended mnemonics) ProcessInstruction(Inst, Operands); Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + Out.EmitInstruction(Inst, STI); return false; case Match_MissingFeature: return Error(IDLoc, "instruction use requires an option to be enabled"); @@ -879,7 +946,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); - ErrorLoc = ((PPCOperand*)Operands[ErrorInfo])->getStartLoc(); + ErrorLoc = ((PPCOperand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } @@ -960,7 +1027,7 @@ ExtractModifierFromExpr(const MCExpr *E, switch (E->getKind()) { case MCExpr::Target: case MCExpr::Constant: - return 0; + return nullptr; case MCExpr::SymbolRef: { const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E); @@ -988,7 +1055,7 @@ ExtractModifierFromExpr(const MCExpr *E, Variant = PPCMCExpr::VK_PPC_HIGHESTA; break; default: - return 0; + return nullptr; } return MCSymbolRefExpr::Create(&SRE->getSymbol(), Context); @@ -998,7 +1065,7 @@ ExtractModifierFromExpr(const MCExpr *E, const MCUnaryExpr *UE = cast<MCUnaryExpr>(E); const MCExpr *Sub = ExtractModifierFromExpr(UE->getSubExpr(), Variant); if (!Sub) - return 0; + return nullptr; return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context); } @@ -1009,7 +1076,7 @@ ExtractModifierFromExpr(const MCExpr *E, const MCExpr *RHS = ExtractModifierFromExpr(BE->getRHS(), RHSVariant); if (!LHS && !RHS) - return 0; + return nullptr; if (!LHS) LHS = BE->getLHS(); if (!RHS) RHS = BE->getRHS(); @@ -1021,7 +1088,7 @@ ExtractModifierFromExpr(const MCExpr *E, else if (LHSVariant == RHSVariant) Variant = LHSVariant; else - return 0; + return nullptr; return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context); } @@ -1081,10 +1148,16 @@ FixupVariantKind(const MCExpr *E) { llvm_unreachable("Invalid expression kind!"); } -/// Parse an expression. This differs from the default "parseExpression" -/// in that it handles complex \code @l/@ha \endcode modifiers. +/// ParseExpression. This differs from the default "parseExpression" in that +/// it handles modifiers. bool PPCAsmParser:: ParseExpression(const MCExpr *&EVal) { + + if (isDarwin()) + return ParseDarwinExpression(EVal); + + // (ELF Platforms) + // Handle \code @l/@ha \endcode if (getParser().parseExpression(EVal)) return true; @@ -1098,12 +1171,59 @@ ParseExpression(const MCExpr *&EVal) { return false; } +/// ParseDarwinExpression. (MachO Platforms) +/// This differs from the default "parseExpression" in that it handles detection +/// of the \code hi16(), ha16() and lo16() \endcode modifiers. At present, +/// parseExpression() doesn't recognise the modifiers when in the Darwin/MachO +/// syntax form so it is done here. TODO: Determine if there is merit in arranging +/// for this to be done at a higher level. bool PPCAsmParser:: -ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ParseDarwinExpression(const MCExpr *&EVal) { + PPCMCExpr::VariantKind Variant = PPCMCExpr::VK_PPC_None; + switch (getLexer().getKind()) { + default: + break; + case AsmToken::Identifier: + // Compiler-generated Darwin identifiers begin with L,l,_ or "; thus + // something starting with any other char should be part of the + // asm syntax. If handwritten asm includes an identifier like lo16, + // then all bets are off - but no-one would do that, right? + StringRef poss = Parser.getTok().getString(); + if (poss.equals_lower("lo16")) { + Variant = PPCMCExpr::VK_PPC_LO; + } else if (poss.equals_lower("hi16")) { + Variant = PPCMCExpr::VK_PPC_HI; + } else if (poss.equals_lower("ha16")) { + Variant = PPCMCExpr::VK_PPC_HA; + } + if (Variant != PPCMCExpr::VK_PPC_None) { + Parser.Lex(); // Eat the xx16 + if (getLexer().isNot(AsmToken::LParen)) + return Error(Parser.getTok().getLoc(), "expected '('"); + Parser.Lex(); // Eat the '(' + } + break; + } + + if (getParser().parseExpression(EVal)) + return true; + + if (Variant != PPCMCExpr::VK_PPC_None) { + if (getLexer().isNot(AsmToken::RParen)) + return Error(Parser.getTok().getLoc(), "expected ')'"); + Parser.Lex(); // Eat the ')' + EVal = PPCMCExpr::Create(Variant, EVal, false, getParser().getContext()); + } + return false; +} + +/// ParseOperand +/// This handles registers in the form 'NN', '%rNN' for ELF platforms and +/// rNN for MachO. +bool PPCAsmParser::ParseOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); const MCExpr *EVal; - PPCOperand *Op; // Attempt to parse the next token as an immediate switch (getLexer().getKind()) { @@ -1115,20 +1235,35 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { int64_t IntVal; if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) { Parser.Lex(); // Eat the identifier token. - Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64()); - Operands.push_back(Op); + Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64())); return false; } return Error(S, "invalid register name"); + case AsmToken::Identifier: + // Note that non-register-name identifiers from the compiler will begin + // with '_', 'L'/'l' or '"'. Of course, handwritten asm could include + // identifiers like r31foo - so we fall through in the event that parsing + // a register name fails. + if (isDarwin()) { + unsigned RegNo; + int64_t IntVal; + if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) { + Parser.Lex(); // Eat the identifier token. + Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64())); + return false; + } + } + // Fall-through to process non-register-name identifiers as expression. // All other expressions case AsmToken::LParen: case AsmToken::Plus: case AsmToken::Minus: case AsmToken::Integer: - case AsmToken::Identifier: case AsmToken::Dot: case AsmToken::Dollar: + case AsmToken::Exclaim: + case AsmToken::Tilde: if (!ParseExpression(EVal)) break; /* fall through */ @@ -1137,8 +1272,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } // Push the parsed operand into the list of operands - Op = PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64()); - Operands.push_back(Op); + Operands.push_back(PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64())); // Check whether this is a TLS call expression bool TLSCall = false; @@ -1157,8 +1291,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { E = Parser.getTok().getLoc(); Parser.Lex(); // Eat the ')'. - Op = PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64()); - Operands.push_back(Op); + Operands.push_back(PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64())); } // Otherwise, check for D-form memory operands @@ -1177,11 +1310,25 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { break; case AsmToken::Integer: - if (getParser().parseAbsoluteExpression(IntVal) || + if (!isDarwin()) { + if (getParser().parseAbsoluteExpression(IntVal) || IntVal < 0 || IntVal > 31) return Error(S, "invalid register number"); + } else { + return Error(S, "unexpected integer value"); + } break; + case AsmToken::Identifier: + if (isDarwin()) { + unsigned RegNo; + if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) { + Parser.Lex(); // Eat the identifier token. + break; + } + } + // Fall-through.. + default: return Error(S, "invalid memory operand"); } @@ -1191,17 +1338,15 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { E = Parser.getTok().getLoc(); Parser.Lex(); // Eat the ')'. - Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64()); - Operands.push_back(Op); + Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64())); } return false; } /// Parse an instruction mnemonic followed by its operands. -bool PPCAsmParser:: -ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) { // The first operand is the token for the instruction name. // If the next character is a '+' or '-', we need to add it to the // instruction name, to match what TableGen is doing. @@ -1261,14 +1406,23 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, /// ParseDirective parses the PPC specific directives bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); - if (IDVal == ".word") - return ParseDirectiveWord(2, DirectiveID.getLoc()); - if (IDVal == ".llong") - return ParseDirectiveWord(8, DirectiveID.getLoc()); - if (IDVal == ".tc") - return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc()); - if (IDVal == ".machine") - return ParseDirectiveMachine(DirectiveID.getLoc()); + if (!isDarwin()) { + if (IDVal == ".word") + return ParseDirectiveWord(2, DirectiveID.getLoc()); + if (IDVal == ".llong") + return ParseDirectiveWord(8, DirectiveID.getLoc()); + if (IDVal == ".tc") + return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc()); + if (IDVal == ".machine") + return ParseDirectiveMachine(DirectiveID.getLoc()); + if (IDVal == ".abiversion") + return ParseDirectiveAbiVersion(DirectiveID.getLoc()); + if (IDVal == ".localentry") + return ParseDirectiveLocalEntry(DirectiveID.getLoc()); + } else { + if (IDVal == ".machine") + return ParseDarwinDirectiveMachine(DirectiveID.getLoc()); + } return true; } @@ -1279,7 +1433,7 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { for (;;) { const MCExpr *Value; if (getParser().parseExpression(Value)) - return true; + return false; getParser().getStreamer().EmitValue(Value, Size); @@ -1303,8 +1457,10 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) { while (getLexer().isNot(AsmToken::EndOfStatement) && getLexer().isNot(AsmToken::Comma)) Parser.Lex(); - if (getLexer().isNot(AsmToken::Comma)) - return Error(L, "unexpected token in directive"); + if (getLexer().isNot(AsmToken::Comma)) { + Error(L, "unexpected token in directive"); + return false; + } Parser.Lex(); // Align to word size. @@ -1314,12 +1470,14 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) { return ParseDirectiveWord(Size, L); } -/// ParseDirectiveMachine +/// ParseDirectiveMachine (ELF platforms) /// ::= .machine [ cpu | "push" | "pop" ] bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) { if (getLexer().isNot(AsmToken::Identifier) && - getLexer().isNot(AsmToken::String)) - return Error(L, "unexpected token in directive"); + getLexer().isNot(AsmToken::String)) { + Error(L, "unexpected token in directive"); + return false; + } StringRef CPU = Parser.getTok().getIdentifier(); Parser.Lex(); @@ -1329,15 +1487,118 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) { // Implement ".machine any" (by doing nothing) for the benefit // of existing assembler code. Likewise, we can then implement // ".machine push" and ".machine pop" as no-op. - if (CPU != "any" && CPU != "push" && CPU != "pop") - return Error(L, "unrecognized machine type"); + if (CPU != "any" && CPU != "push" && CPU != "pop") { + Error(L, "unrecognized machine type"); + return false; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(L, "unexpected token in directive"); + return false; + } + PPCTargetStreamer &TStreamer = + *static_cast<PPCTargetStreamer *>( + getParser().getStreamer().getTargetStreamer()); + TStreamer.emitMachine(CPU); + + return false; +} + +/// ParseDarwinDirectiveMachine (Mach-o platforms) +/// ::= .machine cpu-identifier +bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) { + if (getLexer().isNot(AsmToken::Identifier) && + getLexer().isNot(AsmToken::String)) { + Error(L, "unexpected token in directive"); + return false; + } + + StringRef CPU = Parser.getTok().getIdentifier(); + Parser.Lex(); + + // FIXME: this is only the 'default' set of cpu variants. + // However we don't act on this information at present, this is simply + // allowing parsing to proceed with minimal sanity checking. + if (CPU != "ppc7400" && CPU != "ppc" && CPU != "ppc64") { + Error(L, "unrecognized cpu type"); + return false; + } + + if (isPPC64() && (CPU == "ppc7400" || CPU == "ppc")) { + Error(L, "wrong cpu type specified for 64bit"); + return false; + } + if (!isPPC64() && CPU == "ppc64") { + Error(L, "wrong cpu type specified for 32bit"); + return false; + } - if (getLexer().isNot(AsmToken::EndOfStatement)) - return Error(L, "unexpected token in directive"); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(L, "unexpected token in directive"); + return false; + } return false; } +/// ParseDirectiveAbiVersion +/// ::= .abiversion constant-expression +bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) { + int64_t AbiVersion; + if (getParser().parseAbsoluteExpression(AbiVersion)){ + Error(L, "expected constant expression"); + return false; + } + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(L, "unexpected token in directive"); + return false; + } + + PPCTargetStreamer &TStreamer = + *static_cast<PPCTargetStreamer *>( + getParser().getStreamer().getTargetStreamer()); + TStreamer.emitAbiVersion(AbiVersion); + + return false; +} + +/// ParseDirectiveLocalEntry +/// ::= .localentry symbol, expression +bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) { + StringRef Name; + if (getParser().parseIdentifier(Name)) { + Error(L, "expected identifier in directive"); + return false; + } + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) { + Error(L, "unexpected token in directive"); + return false; + } + Lex(); + + const MCExpr *Expr; + if (getParser().parseExpression(Expr)) { + Error(L, "expected expression"); + return false; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(L, "unexpected token in directive"); + return false; + } + + PPCTargetStreamer &TStreamer = + *static_cast<PPCTargetStreamer *>( + getParser().getStreamer().getTargetStreamer()); + TStreamer.emitLocalEntry(Sym, Expr); + + return false; +} + + + /// Force static initialization. extern "C" void LLVMInitializePowerPCAsmParser() { RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target); @@ -1351,7 +1612,7 @@ extern "C" void LLVMInitializePowerPCAsmParser() { // Define this matcher function after the auto-generated include so we // have the match class enum definitions. -unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, +unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, unsigned Kind) { // If the kind is a token for a literal immediate, check if our asm // operand matches. This is for InstAliases which have a fixed-value @@ -1365,8 +1626,8 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, default: return Match_InvalidOperand; } - PPCOperand *Op = static_cast<PPCOperand*>(AsmOp); - if (Op->isImm() && Op->getImm() == ImmVal) + PPCOperand &Op = static_cast<PPCOperand &>(AsmOp); + if (Op.isImm() && Op.getImm() == ImmVal) return Match_Success; return Match_InvalidOperand; @@ -1392,6 +1653,6 @@ PPCAsmParser::applyModifierToExpr(const MCExpr *E, case MCSymbolRefExpr::VK_PPC_HIGHESTA: return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx); default: - return 0; + return nullptr; } } diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 9a763f53a2d1..ea4de63a2448 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS PPC.td) tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info) tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info) @@ -34,9 +35,8 @@ add_llvm_target(PowerPCCodeGen PPCSelectionDAGInfo.cpp ) -add_dependencies(LLVMPowerPCCodeGen PowerPCCommonTableGen intrinsics_gen) - add_subdirectory(AsmParser) +add_subdirectory(Disassembler) add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/PowerPC/Disassembler/CMakeLists.txt b/lib/Target/PowerPC/Disassembler/CMakeLists.txt new file mode 100644 index 000000000000..ca457df88d3e --- /dev/null +++ b/lib/Target/PowerPC/Disassembler/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMPowerPCDisassembler + PPCDisassembler.cpp + ) diff --git a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt new file mode 100644 index 000000000000..b0978c227ae9 --- /dev/null +++ b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===-- ./lib/Target/PowerPC/Disassembler/LLVMBuild.txt ---------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = PowerPCDisassembler +parent = PowerPC +required_libraries = MC PowerPCInfo Support +add_to_library_groups = PowerPC diff --git a/lib/Target/PowerPC/Disassembler/Makefile b/lib/Target/PowerPC/Disassembler/Makefile new file mode 100644 index 000000000000..86e3b4752207 --- /dev/null +++ b/lib/Target/PowerPC/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===-- lib/Target/PowerPC/Disassembler/Makefile -----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMPowerPCDisassembler + +# Hack: we need to include 'main' PPC target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp new file mode 100644 index 000000000000..a2305a9efc71 --- /dev/null +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -0,0 +1,348 @@ +//===------ PPCDisassembler.cpp - Disassembler for PowerPC ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define DEBUG_TYPE "ppc-disassembler" + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +class PPCDisassembler : public MCDisassembler { +public: + PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) + : MCDisassembler(STI, Ctx) {} + virtual ~PPCDisassembler() {} + + // Override MCDisassembler. + virtual DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const override; +}; +} // end anonymous namespace + +static MCDisassembler *createPPCDisassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new PPCDisassembler(STI, Ctx); +} + +extern "C" void LLVMInitializePowerPCDisassembler() { + // Register the disassembler for each target. + TargetRegistry::RegisterMCDisassembler(ThePPC32Target, + createPPCDisassembler); + TargetRegistry::RegisterMCDisassembler(ThePPC64Target, + createPPCDisassembler); + TargetRegistry::RegisterMCDisassembler(ThePPC64LETarget, + createPPCDisassembler); +} + +// FIXME: These can be generated by TableGen from the existing register +// encoding values! + +static const unsigned CRRegs[] = { + PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, + PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7 +}; + +static const unsigned CRBITRegs[] = { + PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, + PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, + PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, + PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN +}; + +static const unsigned FRegs[] = { + PPC::F0, PPC::F1, PPC::F2, PPC::F3, + PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, + PPC::F12, PPC::F13, PPC::F14, PPC::F15, + PPC::F16, PPC::F17, PPC::F18, PPC::F19, + PPC::F20, PPC::F21, PPC::F22, PPC::F23, + PPC::F24, PPC::F25, PPC::F26, PPC::F27, + PPC::F28, PPC::F29, PPC::F30, PPC::F31 +}; + +static const unsigned VRegs[] = { + PPC::V0, PPC::V1, PPC::V2, PPC::V3, + PPC::V4, PPC::V5, PPC::V6, PPC::V7, + PPC::V8, PPC::V9, PPC::V10, PPC::V11, + PPC::V12, PPC::V13, PPC::V14, PPC::V15, + PPC::V16, PPC::V17, PPC::V18, PPC::V19, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31 +}; + +static const unsigned VSRegs[] = { + PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3, + PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7, + PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11, + PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15, + PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19, + PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23, + PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27, + PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31, + + PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3, + PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, + PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11, + PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15, + PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19, + PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23, + PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27, + PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31 +}; + +static const unsigned VSFRegs[] = { + PPC::F0, PPC::F1, PPC::F2, PPC::F3, + PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, + PPC::F12, PPC::F13, PPC::F14, PPC::F15, + PPC::F16, PPC::F17, PPC::F18, PPC::F19, + PPC::F20, PPC::F21, PPC::F22, PPC::F23, + PPC::F24, PPC::F25, PPC::F26, PPC::F27, + PPC::F28, PPC::F29, PPC::F30, PPC::F31, + + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, + PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, + PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, + PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, + PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, + PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, + PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, + PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 +}; + +static const unsigned GPRegs[] = { + PPC::R0, PPC::R1, PPC::R2, PPC::R3, + PPC::R4, PPC::R5, PPC::R6, PPC::R7, + PPC::R8, PPC::R9, PPC::R10, PPC::R11, + PPC::R12, PPC::R13, PPC::R14, PPC::R15, + PPC::R16, PPC::R17, PPC::R18, PPC::R19, + PPC::R20, PPC::R21, PPC::R22, PPC::R23, + PPC::R24, PPC::R25, PPC::R26, PPC::R27, + PPC::R28, PPC::R29, PPC::R30, PPC::R31 +}; + +static const unsigned GP0Regs[] = { + PPC::ZERO, PPC::R1, PPC::R2, PPC::R3, + PPC::R4, PPC::R5, PPC::R6, PPC::R7, + PPC::R8, PPC::R9, PPC::R10, PPC::R11, + PPC::R12, PPC::R13, PPC::R14, PPC::R15, + PPC::R16, PPC::R17, PPC::R18, PPC::R19, + PPC::R20, PPC::R21, PPC::R22, PPC::R23, + PPC::R24, PPC::R25, PPC::R26, PPC::R27, + PPC::R28, PPC::R29, PPC::R30, PPC::R31 +}; + +static const unsigned G8Regs[] = { + PPC::X0, PPC::X1, PPC::X2, PPC::X3, + PPC::X4, PPC::X5, PPC::X6, PPC::X7, + PPC::X8, PPC::X9, PPC::X10, PPC::X11, + PPC::X12, PPC::X13, PPC::X14, PPC::X15, + PPC::X16, PPC::X17, PPC::X18, PPC::X19, + PPC::X20, PPC::X21, PPC::X22, PPC::X23, + PPC::X24, PPC::X25, PPC::X26, PPC::X27, + PPC::X28, PPC::X29, PPC::X30, PPC::X31 +}; + +template <std::size_t N> +static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, + const unsigned (&Regs)[N]) { + assert(RegNo < N && "Invalid register number"); + Inst.addOperand(MCOperand::CreateReg(Regs[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, CRRegs); +} + +static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, CRBITRegs); +} + +static DecodeStatus DecodeF4RCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, FRegs); +} + +static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, FRegs); +} + +static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, VRegs); +} + +static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, VSRegs); +} + +static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, VSFRegs); +} + +static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, GPRegs); +} + +static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, GP0Regs); +} + +static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, G8Regs); +} + +#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass +#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass + +template<unsigned N> +static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + assert(isUInt<N>(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::CreateImm(Imm)); + return MCDisassembler::Success; +} + +template<unsigned N> +static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + assert(isUInt<N>(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm))); + return MCDisassembler::Success; +} + +static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + // Decode the memri field (imm, reg), which has the low 16-bits as the + // displacement and the next 5 bits as the register #. + + uint64_t Base = Imm >> 16; + uint64_t Disp = Imm & 0xFFFF; + + assert(Base < 32 && "Invalid base register"); + + switch (Inst.getOpcode()) { + default: break; + case PPC::LBZU: + case PPC::LHAU: + case PPC::LHZU: + case PPC::LWZU: + case PPC::LFSU: + case PPC::LFDU: + // Add the tied output operand. + Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base])); + break; + case PPC::STBU: + case PPC::STHU: + case PPC::STWU: + case PPC::STFSU: + case PPC::STFDU: + Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base])); + break; + } + + Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp))); + Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + // Decode the memrix field (imm, reg), which has the low 14-bits as the + // displacement and the next 5 bits as the register #. + + uint64_t Base = Imm >> 14; + uint64_t Disp = Imm & 0x3FFF; + + assert(Base < 32 && "Invalid base register"); + + if (Inst.getOpcode() == PPC::LDU) + // Add the tied output operand. + Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base])); + else if (Inst.getOpcode() == PPC::STDU) + Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base])); + + Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp << 2))); + Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + // The cr bit encoding is 0x80 >> cr_reg_num. + + unsigned Zeros = countTrailingZeros(Imm); + assert(Zeros < 8 && "Invalid CR bit value"); + + Inst.addOperand(MCOperand::CreateReg(CRRegs[7 - Zeros])); + return MCDisassembler::Success; +} + +#include "PPCGenDisassemblerTables.inc" + +DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + // Get the four bytes of the instruction. + uint8_t Bytes[4]; + Size = 4; + if (Region.readBytes(Address, Size, Bytes) == -1) { + Size = 0; + return MCDisassembler::Fail; + } + + // The instruction is big-endian encoded. + uint32_t Inst = (Bytes[0] << 24) | + (Bytes[1] << 16) | + (Bytes[2] << 8) | + (Bytes[3] << 0); + + return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI); +} + diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt index a605cc4b5f27..ab30a110f40e 100644 --- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt +++ b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMPowerPCAsmPrinter PPCInstPrinter.cpp ) - -add_dependencies(LLVMPowerPCAsmPrinter PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 8281b5ca03c8..7279b091b34c 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "PPCInstPrinter.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" @@ -23,6 +22,8 @@ #include "llvm/Target/TargetOpcodes.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + // FIXME: Once the integrated assembler supports full register names, tie this // to the verbose-asm setting. static cl::opt<bool> @@ -149,6 +150,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, case PPC::PRED_NU: O << "nu"; return; + case PPC::PRED_BIT_SET: + case PPC::PRED_BIT_UNSET: + llvm_unreachable("Invalid use of bit predicate code"); } llvm_unreachable("Invalid predicate code"); } @@ -184,6 +188,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, case PPC::PRED_NU_PLUS: O << "+"; return; + case PPC::PRED_BIT_SET: + case PPC::PRED_BIT_UNSET: + llvm_unreachable("Invalid use of bit predicate code"); } llvm_unreachable("Invalid predicate code"); } @@ -193,6 +200,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, printOperand(MI, OpNo+1, O); } +void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned int Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 3 && "Invalid u2imm argument!"); + O << (unsigned int)Value; +} + void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { int Value = MI->getOperand(OpNo).getImm(); @@ -310,7 +324,10 @@ static const char *stripRegisterPrefix(const char *RegName) { switch (RegName[0]) { case 'r': case 'f': - case 'v': return RegName + 1; + case 'v': + if (RegName[1] == 's') + return RegName + 2; + return RegName + 1; case 'c': if (RegName[1] == 'r') return RegName + 2; } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 8a4c03d64540..211a62813e7a 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -31,8 +31,8 @@ public: return IsDarwin; } - virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + void printRegName(raw_ostream &OS, unsigned RegNo) const override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); @@ -41,9 +41,9 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printPredicateOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O, const char *Modifier = 0); - + raw_ostream &O, const char *Modifier = nullptr); + void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt index 7b3e843507a8..9d173d64b944 100644 --- a/lib/Target/PowerPC/LLVMBuild.txt +++ b/lib/Target/PowerPC/LLVMBuild.txt @@ -16,18 +16,20 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup name = PowerPC parent = Target +has_asmparser = 1 has_asmprinter = 1 +has_disassembler = 1 has_jit = 1 [component_1] type = Library name = PowerPCCodeGen parent = PowerPC -required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target +required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target TransformUtils add_to_library_groups = PowerPC diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt index 3efa5ecf9096..3cea65ee4de6 100644 --- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt @@ -8,5 +8,3 @@ add_llvm_library(LLVMPowerPCDesc PPCMachObjectWriter.cpp PPCELFObjectWriter.cpp ) - -add_dependencies(LLVMPowerPCDesc PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 0d420815ea62..c54d5e75bdfd 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -9,7 +9,9 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" @@ -71,14 +73,18 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { namespace { class PPCAsmBackend : public MCAsmBackend { -const Target &TheTarget; + const Target &TheTarget; + bool IsLittleEndian; public: - PPCAsmBackend(const Target &T) : MCAsmBackend(), TheTarget(T) {} + PPCAsmBackend(const Target &T, bool isLittle) : MCAsmBackend(), TheTarget(T), + IsLittleEndian(isLittle) {} - unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; } + unsigned getNumFixupKinds() const override { + return PPC::NumTargetFixupKinds; + } - const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { - const static MCFixupKindInfo Infos[PPC::NumTargetFixupKinds] = { + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { + const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = { // name offset bits flags { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, @@ -88,17 +94,27 @@ public: { "fixup_ppc_half16ds", 0, 14, 0 }, { "fixup_ppc_nofixup", 0, 0, 0 } }; + const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = { + // name offset bits flags + { "fixup_ppc_br24", 2, 24, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_brcond14", 2, 14, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_br24abs", 2, 24, 0 }, + { "fixup_ppc_brcond14abs", 2, 14, 0 }, + { "fixup_ppc_half16", 0, 16, 0 }, + { "fixup_ppc_half16ds", 2, 14, 0 }, + { "fixup_ppc_nofixup", 0, 0, 0 } + }; if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); - return Infos[Kind - FirstTargetFixupKind]; + return (IsLittleEndian? InfosLE : InfosBE)[Kind - FirstTargetFixupKind]; } void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const { + uint64_t Value, bool IsPCRel) const override { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. @@ -108,11 +124,37 @@ public: // For each byte of the fragment that the fixup touches, mask in the bits // from the fixup value. The Value has been "split up" into the appropriate // bitfields above. - for (unsigned i = 0; i != NumBytes; ++i) - Data[Offset + i] |= uint8_t((Value >> ((NumBytes - i - 1)*8)) & 0xff); + for (unsigned i = 0; i != NumBytes; ++i) { + unsigned Idx = IsLittleEndian ? i : (NumBytes - 1 - i); + Data[Offset + i] |= uint8_t((Value >> (Idx * 8)) & 0xff); + } + } + + void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + const MCValue &Target, uint64_t &Value, + bool &IsResolved) override { + switch ((PPC::Fixups)Fixup.getKind()) { + default: break; + case PPC::fixup_ppc_br24: + case PPC::fixup_ppc_br24abs: + // If the target symbol has a local entry point we must not attempt + // to resolve the fixup directly. Emit a relocation and leave + // resolution of the final target address to the linker. + if (const MCSymbolRefExpr *A = Target.getSymA()) { + const MCSymbolData &Data = Asm.getSymbolData(A->getSymbol()); + // The "other" values are stored in the last 6 bits of the second byte. + // The traditional defines for STO values assume the full byte and thus + // the shift to pack it. + unsigned Other = MCELF::getOther(Data) << 2; + if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0) + IsResolved = false; + } + break; + } } - bool mayNeedRelaxation(const MCInst &Inst) const { + bool mayNeedRelaxation(const MCInst &Inst) const override { // FIXME. return false; } @@ -120,18 +162,18 @@ public: bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const { + const MCAsmLayout &Layout) const override { // FIXME. llvm_unreachable("relaxInstruction() unimplemented"); } - void relaxInstruction(const MCInst &Inst, MCInst &Res) const { + void relaxInstruction(const MCInst &Inst, MCInst &Res) const override { // FIXME. llvm_unreachable("relaxInstruction() unimplemented"); } - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override { uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) OW->Write32(0x60000000); @@ -152,6 +194,10 @@ public: assert(Name == "ppc32" && "Unknown target name!"); return 4; } + + bool isLittleEndian() const { + return IsLittleEndian; + } }; } // end anonymous namespace @@ -160,9 +206,9 @@ public: namespace { class DarwinPPCAsmBackend : public PPCAsmBackend { public: - DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { } + DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { bool is64 = getPointerSize() == 8; return createPPCMachObjectWriter( OS, @@ -170,26 +216,18 @@ namespace { (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC), MachO::CPU_SUBTYPE_POWERPC_ALL); } - - virtual bool doesSectionRequireSymbols(const MCSection &Section) const { - return false; - } }; class ELFPPCAsmBackend : public PPCAsmBackend { uint8_t OSABI; public: - ELFPPCAsmBackend(const Target &T, uint8_t OSABI) : - PPCAsmBackend(T), OSABI(OSABI) { } + ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) : + PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { bool is64 = getPointerSize() == 8; - return createPPCELFObjectWriter(OS, is64, OSABI); - } - - virtual bool doesSectionRequireSymbols(const MCSection &Section) const { - return false; + return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI); } }; @@ -202,5 +240,6 @@ MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, return new DarwinPPCAsmBackend(T); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); - return new ELFPPCAsmBackend(T, OSABI); + bool IsLittleEndian = Triple(TT).getArch() == Triple::ppc64le; + return new ELFPPCAsmBackend(T, IsLittleEndian, OSABI); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 54de70eff71a..e93e95fc0751 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -9,7 +9,9 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "MCTargetDesc/PPCMCExpr.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCValue.h" @@ -27,17 +29,11 @@ namespace { virtual unsigned getRelocTypeInner(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const; - virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel, bool IsRelocWithSymbol, - int64_t Addend) const; - virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, - const MCValue &Target, - const MCFragment &F, - const MCFixup &Fixup, - bool IsPCRel) const; - virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const; + unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel) const override; + + bool needsRelocateWithSymbol(const MCSymbolData &SD, + unsigned Type) const override; }; } @@ -49,12 +45,39 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) PPCELFObjectWriter::~PPCELFObjectWriter() { } +static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target, + const MCFixup &Fixup) { + const MCExpr *Expr = Fixup.getValue(); + + if (Expr->getKind() != MCExpr::Target) + return Target.getAccessVariant(); + + switch (cast<PPCMCExpr>(Expr)->getKind()) { + case PPCMCExpr::VK_PPC_None: + return MCSymbolRefExpr::VK_None; + case PPCMCExpr::VK_PPC_LO: + return MCSymbolRefExpr::VK_PPC_LO; + case PPCMCExpr::VK_PPC_HI: + return MCSymbolRefExpr::VK_PPC_HI; + case PPCMCExpr::VK_PPC_HA: + return MCSymbolRefExpr::VK_PPC_HA; + case PPCMCExpr::VK_PPC_HIGHERA: + return MCSymbolRefExpr::VK_PPC_HIGHERA; + case PPCMCExpr::VK_PPC_HIGHER: + return MCSymbolRefExpr::VK_PPC_HIGHER; + case PPCMCExpr::VK_PPC_HIGHEST: + return MCSymbolRefExpr::VK_PPC_HIGHEST; + case PPCMCExpr::VK_PPC_HIGHESTA: + return MCSymbolRefExpr::VK_PPC_HIGHESTA; + } + llvm_unreachable("unknown PPCMCExpr kind"); +} + unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { - MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? - MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Target, Fixup); // determine the type of the relocation unsigned Type; @@ -64,7 +87,15 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, llvm_unreachable("Unimplemented"); case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: - Type = ELF::R_PPC_REL24; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC_REL24; + break; + case MCSymbolRefExpr::VK_PLT: + Type = ELF::R_PPC_PLTREL24; + break; + } break; case PPC::fixup_ppc_brcond14: case PPC::fixup_ppc_brcond14abs: @@ -356,64 +387,31 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel, - bool IsRelocWithSymbol, - int64_t Addend) const { + bool IsPCRel) const { return getRelocTypeInner(Target, Fixup, IsPCRel); } -const MCSymbol *PPCELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, - const MCValue &Target, - const MCFragment &F, - const MCFixup &Fixup, - bool IsPCRel) const { - assert(Target.getSymA() && "SymA cannot be 0"); - MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? - MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); - - bool EmitThisSym; - switch (Modifier) { - // GOT references always need a relocation, even if the - // target symbol is local. - case MCSymbolRefExpr::VK_GOT: - case MCSymbolRefExpr::VK_PPC_GOT_LO: - case MCSymbolRefExpr::VK_PPC_GOT_HI: - case MCSymbolRefExpr::VK_PPC_GOT_HA: - EmitThisSym = true; - break; - default: - EmitThisSym = false; - break; - } - - if (EmitThisSym) - return &Target.getSymA()->getSymbol().AliasedSymbol(); - return NULL; -} - -const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const { - assert(Target.getSymA() && "SymA cannot be 0"); - const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol(); - - unsigned RelocType = getRelocTypeInner(Target, Fixup, IsPCRel); - - // The .odp creation emits a relocation against the symbol ".TOC." which - // create a R_PPC64_TOC relocation. However the relocation symbol name - // in final object creation should be NULL, since the symbol does not - // really exist, it is just the reference to TOC base for the current - // object file. - bool EmitThisSym = RelocType != ELF::R_PPC64_TOC; +bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, + unsigned Type) const { + switch (Type) { + default: + return false; - if (EmitThisSym && !Symbol.isTemporary()) - return &Symbol; - return NULL; + case ELF::R_PPC_REL24: + // If the target symbol has a local entry point, we must keep the + // target symbol to preserve that information for the linker. + // The "other" values are stored in the last 6 bits of the second byte. + // The traditional defines for STO values assume the full byte and thus + // the shift to pack it. + unsigned Other = MCELF::getOther(SD) << 2; + return (Other & ELF::STO_PPC64_LOCAL_MASK) != 0; + } } MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, bool Is64Bit, + bool IsLittleEndian, uint8_t OSABI) { MCELFObjectTargetWriter *MOTW = new PPCELFObjectWriter(Is64Bit, OSABI); - return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false); + return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 1d9c06406a4b..b95a2ac13e04 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -18,24 +18,6 @@ using namespace llvm; void PPCMCAsmInfoDarwin::anchor() { } -/// This version of the constructor is here to maintain ABI compatibility with -/// LLVM 3.4.0 -PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { - if (is64Bit) { - PointerSize = CalleeSaveStackSlotSize = 8; - } - IsLittleEndian = false; - - CommentString = ";"; - ExceptionsType = ExceptionHandling::DwarfCFI; - - if (!is64Bit) - Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode. - - AssemblerDialect = 1; // New-Style mnemonics. - SupportsDebugInformation= true; // Debug information. -} - PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) { if (is64Bit) { PointerSize = CalleeSaveStackSlotSize = 8; @@ -46,32 +28,32 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) { ExceptionsType = ExceptionHandling::DwarfCFI; if (!is64Bit) - Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode. + Data64bitsDirective = nullptr; // We can't emit a 64-bit unit in PPC32 mode. AssemblerDialect = 1; // New-Style mnemonics. SupportsDebugInformation= true; // Debug information. - // old assembler lacks some directives + // The installed assembler for OSX < 10.6 lacks some directives. // FIXME: this should really be a check on the assembler characteristics // rather than OS version if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6)) HasWeakDefCanBeHiddenDirective = false; + + UseIntegratedAssembler = true; } void PPCLinuxMCAsmInfo::anchor() { } -PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { +PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) { if (is64Bit) { PointerSize = CalleeSaveStackSlotSize = 8; } - IsLittleEndian = false; + IsLittleEndian = T.getArch() == Triple::ppc64le; // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; CommentString = "#"; - GlobalPrefix = ""; - PrivateGlobalPrefix = ".L"; // Uses '.section' before '.bss' directive UsesELFSectionDirectiveForBSS = true; @@ -89,7 +71,12 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { ExceptionsType = ExceptionHandling::DwarfCFI; ZeroDirective = "\t.space\t"; - Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; + Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr; AssemblerDialect = 1; // New-Style mnemonics. + + if (T.getOS() == llvm::Triple::FreeBSD || + (T.getOS() == llvm::Triple::NetBSD && !is64Bit) || + (T.getOS() == llvm::Triple::OpenBSD && !is64Bit)) + UseIntegratedAssembler = true; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index 633970ccc289..754330b2c60f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -21,18 +21,15 @@ namespace llvm { class Triple; class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin { - virtual void anchor(); + void anchor() override; public: - /// This version of the constructor is here to maintain ABI compatibility - /// with LLVM 3.4.0. - explicit PPCMCAsmInfoDarwin(bool is64Bit); explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&); }; class PPCLinuxMCAsmInfo : public MCAsmInfoELF { - virtual void anchor(); + void anchor() override; public: - explicit PPCLinuxMCAsmInfo(bool is64Bit); + explicit PPCLinuxMCAsmInfo(bool is64Bit, const Triple&); }; } // namespace llvm diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 346a9beada90..435a93f78c1d 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mccodeemitter" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/ADT/Statistic.h" @@ -26,6 +25,8 @@ #include "llvm/Target/TargetOpcodes.h" using namespace llvm; +#define DEBUG_TYPE "mccodeemitter" + STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); namespace { @@ -33,70 +34,113 @@ class PPCMCCodeEmitter : public MCCodeEmitter { PPCMCCodeEmitter(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; - const MCSubtargetInfo &STI; + const MCInstrInfo &MCII; const MCContext &CTX; - Triple TT; + bool IsLittleEndian; public: - PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) - : STI(sti), CTX(ctx), TT(STI.getTargetTriple()) { + PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool isLittle) + : MCII(mcii), CTX(ctx), IsLittleEndian(isLittle) { } ~PPCMCCodeEmitter() {} unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. uint64_t getBinaryCodeForInstr(const MCInst &MI, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const override { // For fast-isel, a float COPY_TO_REGCLASS can survive this long. // It's just a nop to keep the register classes happy, so don't // generate anything. unsigned Opcode = MI.getOpcode(); + const MCInstrDesc &Desc = MCII.get(Opcode); if (Opcode == TargetOpcode::COPY_TO_REGCLASS) return; - uint64_t Bits = getBinaryCodeForInstr(MI, Fixups); + uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); - // BL8_NOP etc. all have a size of 8 because of the following 'nop'. - unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value! - if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP || - Opcode == PPC::BL8_NOP_TLS) - Size = 8; - - // Output the constant in big endian byte order. - int ShiftValue = (Size * 8) - 8; - for (unsigned i = 0; i != Size; ++i) { - OS << (char)(Bits >> ShiftValue); - Bits <<= 8; + // Output the constant in big/little endian byte order. + unsigned Size = Desc.getSize(); + switch (Size) { + case 4: + if (IsLittleEndian) { + OS << (char)(Bits); + OS << (char)(Bits >> 8); + OS << (char)(Bits >> 16); + OS << (char)(Bits >> 24); + } else { + OS << (char)(Bits >> 24); + OS << (char)(Bits >> 16); + OS << (char)(Bits >> 8); + OS << (char)(Bits); + } + break; + case 8: + // If we emit a pair of instructions, the first one is + // always in the top 32 bits, even on little-endian. + if (IsLittleEndian) { + OS << (char)(Bits >> 32); + OS << (char)(Bits >> 40); + OS << (char)(Bits >> 48); + OS << (char)(Bits >> 56); + OS << (char)(Bits); + OS << (char)(Bits >> 8); + OS << (char)(Bits >> 16); + OS << (char)(Bits >> 24); + } else { + OS << (char)(Bits >> 56); + OS << (char)(Bits >> 48); + OS << (char)(Bits >> 40); + OS << (char)(Bits >> 32); + OS << (char)(Bits >> 24); + OS << (char)(Bits >> 16); + OS << (char)(Bits >> 8); + OS << (char)(Bits); + } + break; + default: + llvm_unreachable ("Invalid instruction size"); } ++MCNumEmitted; // Keep track of the # of mi's emitted. @@ -110,14 +154,17 @@ MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new PPCMCCodeEmitter(MCII, STI, Ctx); + Triple TT(STI.getTargetTriple()); + bool IsLittleEndian = TT.getArch() == Triple::ppc64le; + return new PPCMCCodeEmitter(MCII, Ctx, IsLittleEndian); } unsigned PPCMCCodeEmitter:: getDirectBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the branch target. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), @@ -126,9 +173,10 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo, } unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the branch target. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), @@ -138,9 +186,10 @@ unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo, unsigned PPCMCCodeEmitter:: getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the branch target. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), @@ -150,9 +199,10 @@ getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo, unsigned PPCMCCodeEmitter:: getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the branch target. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), @@ -161,79 +211,87 @@ getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, } unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the immediate field. - Fixups.push_back(MCFixup::Create(2, MO.getExpr(), + Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_half16)); return 0; } unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // Encode (imm, reg) as a memri, which has the low 16-bits as the // displacement and the next 5 bits as the register #. assert(MI.getOperand(OpNo+1).isReg()); - unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 16; + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 16; const MCOperand &MO = MI.getOperand(OpNo); if (MO.isImm()) - return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits; + return (getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF) | RegBits; // Add a fixup for the displacement field. - Fixups.push_back(MCFixup::Create(2, MO.getExpr(), + Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_half16)); return RegBits; } unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // Encode (imm, reg) as a memrix, which has the low 14-bits as the // displacement and the next 5 bits as the register #. assert(MI.getOperand(OpNo+1).isReg()); - unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 14; + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 14; const MCOperand &MO = MI.getOperand(OpNo); if (MO.isImm()) - return ((getMachineOpValue(MI, MO, Fixups) >> 2) & 0x3FFF) | RegBits; + return ((getMachineOpValue(MI, MO, Fixups, STI) >> 2) & 0x3FFF) | RegBits; // Add a fixup for the displacement field. - Fixups.push_back(MCFixup::Create(2, MO.getExpr(), + Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_half16ds)); return RegBits; } unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups); + if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups, STI); // Add a fixup for the TLS register, which simply provides a relocation // hint to the linker that this statement is part of a relocation sequence. // Return the thread-pointer register's encoding. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_nofixup)); - return CTX.getRegisterInfo()->getEncodingValue(PPC::X13); + Triple TT(STI.getTargetTriple()); + bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le; + return CTX.getRegisterInfo()->getEncodingValue(isPPC64 ? PPC::X13 : PPC::R2); } unsigned PPCMCCodeEmitter::getTLSCallEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // For special TLS calls, we need two fixups; one for the branch target // (__tls_get_addr), which we create via getDirectBrEncoding as usual, // and one for the TLSGD or TLSLD symbol, which is emitted here. const MCOperand &MO = MI.getOperand(OpNo+1); Fixups.push_back(MCFixup::Create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_nofixup)); - return getDirectBrEncoding(MI, OpNo, Fixups); + return getDirectBrEncoding(MI, OpNo, Fixups, STI); } unsigned PPCMCCodeEmitter:: get_crbitm_encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 || MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) && @@ -244,7 +302,8 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo, unsigned PPCMCCodeEmitter:: getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { if (MO.isReg()) { // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. // The GPR operand should come through here though. diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index d7e84021595e..3ac0aca6b78c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -7,14 +7,16 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ppcmcexpr" #include "PPCMCExpr.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCObjectStreamer.h" using namespace llvm; +#define DEBUG_TYPE "ppcmcexpr" + const PPCMCExpr* PPCMCExpr::Create(VariantKind Kind, const MCExpr *Expr, bool isDarwin, MCContext &Ctx) { @@ -54,7 +56,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout) const { MCValue Value; - if (!Layout || !getSubExpr()->EvaluateAsRelocatable(Value, *Layout)) + if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout)) return false; if (Value.isAbsolute()) { @@ -86,6 +88,9 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, } Res = MCValue::get(Result); } else { + if (!Layout) + return false; + MCContext &Context = Layout->getAssembler().getContext(); const MCSymbolRefExpr *Sym = Value.getSymA(); MCSymbolRefExpr::VariantKind Modifier = Sym->getKind(); @@ -123,33 +128,6 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, return true; } -// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps -// that method should be made public? -static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) { - switch (Value->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expr!"); - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); - AddValueSymbols_(BE->getLHS(), Asm); - AddValueSymbols_(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: - Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); - break; - } -} - -void PPCMCExpr::AddValueSymbols(MCAssembler *Asm) const { - AddValueSymbols_(getSubExpr(), Asm); +void PPCMCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index e44c7c1adc67..bca408507e72 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -10,9 +10,9 @@ #ifndef PPCMCEXPR_H #define PPCMCEXPR_H +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCValue.h" -#include "llvm/MC/MCAsmLayout.h" namespace llvm { @@ -76,16 +76,16 @@ public: /// @} - void PrintImpl(raw_ostream &OS) const; + void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const; - void AddValueSymbols(MCAssembler *) const; - const MCSection *FindAssociatedSection() const { + const MCAsmLayout *Layout) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; + const MCSection *FindAssociatedSection() const override { return getSubExpr()->FindAssociatedSection(); } // There are no TLS PPCMCExprs at the moment. - void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 6a5051840181..4c6780ff75a7 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -16,16 +16,22 @@ #include "PPCMCAsmInfo.h" #include "PPCTargetStreamer.h" #include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCELF.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_INSTRINFO_MC_DESC #include "PPCGenInstrInfo.inc" @@ -35,10 +41,9 @@ #define GET_REGINFO_MC_DESC #include "PPCGenRegisterInfo.inc" -using namespace llvm; - // Pin the vtable to this file. PPCTargetStreamer::~PPCTargetStreamer() {} +PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} static MCInstrInfo *createPPCMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); @@ -74,12 +79,12 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { if (TheTriple.isOSDarwin()) MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple); else - MAI = new PPCLinuxMCAsmInfo(isPPC64); + MAI = new PPCLinuxMCAsmInfo(isPPC64, TheTriple); // Initial state of the frame pointer is R1. unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1; MCCFIInstruction Inst = - MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(Reg, true), 0); + MCCFIInstruction::createDefCfa(nullptr, MRI.getDwarfRegNum(Reg, true), 0); MAI->addInitialFrameState(Inst); return MAI; @@ -112,20 +117,90 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer { formatted_raw_ostream &OS; public: - PPCTargetAsmStreamer(formatted_raw_ostream &OS) : OS(OS) {} - virtual void emitTCEntry(const MCSymbol &S) { + PPCTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) + : PPCTargetStreamer(S), OS(OS) {} + void emitTCEntry(const MCSymbol &S) override { OS << "\t.tc "; OS << S.getName(); OS << "[TC],"; OS << S.getName(); OS << '\n'; } + void emitMachine(StringRef CPU) override { + OS << "\t.machine " << CPU << '\n'; + } + virtual void emitAbiVersion(int AbiVersion) override { + OS << "\t.abiversion " << AbiVersion << '\n'; + } + virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) { + OS << "\t.localentry\t" << *S << ", " << *LocalOffset << '\n'; + } }; class PPCTargetELFStreamer : public PPCTargetStreamer { - virtual void emitTCEntry(const MCSymbol &S) { +public: + PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} + MCELFStreamer &getStreamer() { + return static_cast<MCELFStreamer &>(Streamer); + } + virtual void emitTCEntry(const MCSymbol &S) override { // Creates a R_PPC64_TOC relocation - Streamer->EmitSymbolValue(&S, 8); + Streamer.EmitSymbolValue(&S, 8); + } + void emitMachine(StringRef CPU) override { + // FIXME: Is there anything to do in here or does this directive only + // limit the parser? + } + virtual void emitAbiVersion(int AbiVersion) override { + MCAssembler &MCA = getStreamer().getAssembler(); + unsigned Flags = MCA.getELFHeaderEFlags(); + Flags &= ~ELF::EF_PPC64_ABI; + Flags |= (AbiVersion & ELF::EF_PPC64_ABI); + MCA.setELFHeaderEFlags(Flags); + } + virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) { + MCAssembler &MCA = getStreamer().getAssembler(); + MCSymbolData &Data = getStreamer().getOrCreateSymbolData(S); + + int64_t Res; + if (!LocalOffset->EvaluateAsAbsolute(Res, MCA)) + report_fatal_error(".localentry expression must be absolute."); + + unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res); + if (Res != ELF::decodePPC64LocalEntryOffset(Encoded)) + report_fatal_error(".localentry expression cannot be encoded."); + + // The "other" values are stored in the last 6 bits of the second byte. + // The traditional defines for STO values assume the full byte and thus + // the shift to pack it. + unsigned Other = MCELF::getOther(Data) << 2; + Other &= ~ELF::STO_PPC64_LOCAL_MASK; + Other |= Encoded; + MCELF::setOther(Data, Other >> 2); + + // For GAS compatibility, unless we already saw a .abiversion directive, + // set e_flags to indicate ELFv2 ABI. + unsigned Flags = MCA.getELFHeaderEFlags(); + if ((Flags & ELF::EF_PPC64_ABI) == 0) + MCA.setELFHeaderEFlags(Flags | 2); + } +}; + +class PPCTargetMachOStreamer : public PPCTargetStreamer { +public: + PPCTargetMachOStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} + void emitTCEntry(const MCSymbol &S) override { + llvm_unreachable("Unknown pseudo-op: .tc"); + } + void emitMachine(StringRef CPU) override { + // FIXME: We should update the CPUType, CPUSubType in the Object file if + // the new values are different from the defaults. + } + virtual void emitAbiVersion(int AbiVersion) override { + llvm_unreachable("Unknown pseudo-op: .abiversion"); + } + virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) { + llvm_unreachable("Unknown pseudo-op: .localentry"); } }; } @@ -135,25 +210,31 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, MCContext &Ctx, MCAsmBackend &MAB, raw_ostream &OS, MCCodeEmitter *Emitter, + const MCSubtargetInfo &STI, bool RelaxAll, bool NoExecStack) { - if (Triple(TT).isOSDarwin()) - return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); + if (Triple(TT).isOSDarwin()) { + MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); + new PPCTargetMachOStreamer(*S); + return S; + } - PPCTargetStreamer *S = new PPCTargetELFStreamer(); - return createELFStreamer(Ctx, S, MAB, OS, Emitter, RelaxAll, NoExecStack); + MCStreamer *S = + createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); + new PPCTargetELFStreamer(*S); + return S; } static MCStreamer * createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useLoc, bool useCFI, - bool useDwarfDirectory, MCInstPrinter *InstPrint, - MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) { - PPCTargetStreamer *S = new PPCTargetAsmStreamer(OS); - - return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI, - useDwarfDirectory, InstPrint, CE, TAB, - ShowInst); + bool isVerboseAsm, bool useDwarfDirectory, + MCInstPrinter *InstPrint, MCCodeEmitter *CE, + MCAsmBackend *TAB, bool ShowInst) { + + MCStreamer *S = llvm::createAsmStreamer( + Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); + new PPCTargetAsmStreamer(*S, OS); + return S; } static MCInstPrinter *createPPCMCInstPrinter(const Target &T, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 0b0ca241e26b..474395b93637 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -46,6 +46,7 @@ MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI, /// createPPCELFObjectWriter - Construct an PPC ELF object writer. MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, bool Is64Bit, + bool IsLittleEndian, uint8_t OSABI); /// createPPCELFObjectWriter - Construct a PPC Mach-O object writer. MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index bbafe2e78955..cff27baeb5ee 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -44,7 +44,7 @@ public: void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) { + uint64_t &FixedValue) override { if (Writer->is64Bit()) { report_fatal_error("Relocation emission for MachO/PPC64 unimplemented."); } else @@ -206,7 +206,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); + const MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) report_fatal_error("symbol '" + A->getName() + @@ -219,7 +219,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( uint32_t Value2 = 0; if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) report_fatal_error("symbol '" + B->getSymbol().getName() + @@ -324,7 +324,7 @@ void PPCMachObjectWriter::RecordPPCRelocation( // this doesn't seem right for RIT_PPC_BR24 // Get the symbol data, if any. - MCSymbolData *SD = 0; + const MCSymbolData *SD = nullptr; if (Target.getSymA()) SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp index 63facc5446d3..c2987b641c04 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp @@ -42,6 +42,10 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { case PPC::PRED_LE_PLUS: return PPC::PRED_GT_MINUS; case PPC::PRED_NU_PLUS: return PPC::PRED_UN_MINUS; case PPC::PRED_UN_PLUS: return PPC::PRED_NU_MINUS; + + // Simple predicates for single condition-register bits. + case PPC::PRED_BIT_SET: return PPC::PRED_BIT_UNSET; + case PPC::PRED_BIT_UNSET: return PPC::PRED_BIT_SET; } llvm_unreachable("Unknown PPC branch opcode!"); } @@ -72,6 +76,10 @@ PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) { case PPC::PRED_LE_PLUS: return PPC::PRED_GE_PLUS; case PPC::PRED_NU_PLUS: return PPC::PRED_NU_PLUS; case PPC::PRED_UN_PLUS: return PPC::PRED_UN_PLUS; + + case PPC::PRED_BIT_SET: + case PPC::PRED_BIT_UNSET: + llvm_unreachable("Invalid use of bit predicate code"); } llvm_unreachable("Unknown PPC branch opcode!"); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index d498c2f8f446..10e328a8116e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -48,7 +48,12 @@ namespace PPC { PRED_GT_PLUS = (1 << 5) | 15, PRED_NE_PLUS = (2 << 5) | 7, PRED_UN_PLUS = (3 << 5) | 15, - PRED_NU_PLUS = (3 << 5) | 7 + PRED_NU_PLUS = (3 << 5) | 7, + + // When dealing with individual condition-register bits, we have simple set + // and unset predicates. + PRED_BIT_SET = 1024, + PRED_BIT_UNSET = 1025 }; /// Invert the specified predicate. != -> ==, < -> >=. diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile index 21fdcd9350e1..c96674809b01 100644 --- a/lib/Target/PowerPC/Makefile +++ b/lib/Target/PowerPC/Makefile @@ -16,8 +16,9 @@ BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \ PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \ PPCGenInstrInfo.inc PPCGenDAGISel.inc \ PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \ - PPCGenMCCodeEmitter.inc PPCGenFastISel.inc + PPCGenMCCodeEmitter.inc PPCGenFastISel.inc \ + PPCGenDisassemblerTables.inc -DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc +DIRS = AsmParser Disassembler InstPrinter TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index f0d5af24466e..ba5fa4f79b4e 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -23,6 +23,7 @@ namespace llvm { class PPCTargetMachine; + class PassRegistry; class FunctionPass; class ImmutablePass; class JITCodeEmitter; @@ -35,6 +36,9 @@ namespace llvm { FunctionPass *createPPCCTRLoopsVerify(); #endif FunctionPass *createPPCEarlyReturnPass(); + FunctionPass *createPPCVSXCopyPass(); + FunctionPass *createPPCVSXCopyCleanupPass(); + FunctionPass *createPPCVSXFMAMutatePass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, @@ -45,6 +49,9 @@ namespace llvm { /// \brief Creates an PPC-specific Target Transformation Info pass. ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM); + void initializePPCVSXFMAMutatePass(PassRegistry&); + extern char &PPCVSXFMAMutateID; + namespace PPCII { /// Target Operand Flag enum. @@ -53,10 +60,11 @@ namespace llvm { // PPC Specific MachineOperand flags. MO_NO_FLAG, - /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the - /// reference is actually to the "FOO$stub" symbol. This is used for calls - /// and jumps to external functions on Tiger and earlier. - MO_DARWIN_STUB = 1, + /// MO_PLT_OR_STUB - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "FOO$stub" or "FOO@plt" symbol. This is + /// used for calls and jumps to external functions on Tiger and earlier, and + /// for PIC calls on Linux and ELF systems. + MO_PLT_OR_STUB = 1, /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to /// the function's picbase, e.g. lo16(symbol-picbase). diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 54e3d400a9d9..a9842b287cbb 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -46,11 +46,14 @@ def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "" def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; +def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true", "Enable 64-bit registers usage for ppc32 [beta]">; +def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true", + "Use condition-register bits individually">; def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", "Enable Altivec instructions">; def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true", @@ -88,7 +91,8 @@ def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", "Enable QPX instructions">; def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true", - "Enable VSX instructions">; + "Enable VSX instructions", + [FeatureAltivec]>; def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true", "Treat mftb as deprecated">; @@ -110,6 +114,12 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", // their record-form variants. class RecFormRel; +// AltVSXFMARel - Filter class used to relate the primary addend-killing VSX +// FMA instruction forms with their corresponding factor-killing forms. +class AltVSXFMARel { + bit IsVSXFMAAlt = 0; +} + //===----------------------------------------------------------------------===// // Relation Map Definitions. //===----------------------------------------------------------------------===// @@ -140,6 +150,19 @@ def getNonRecordFormOpcode : InstrMapping { let ValueCols = [["0"]]; } +def getAltVSXFMAOpcode : InstrMapping { + let FilterClass = "AltVSXFMARel"; + // Instructions with the same BaseName and Interpretation64Bit values + // form a row. + let RowFields = ["BaseName"]; + // Instructions with the same RC value form a column. + let ColFields = ["IsVSXFMAAlt"]; + // The key column are the (default) addend-killing instructions. + let KeyCol = ["0"]; + // Value columns IsVSXFMAAlt=1 + let ValueCols = [["1"]]; +} + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -153,12 +176,12 @@ include "PPCInstrInfo.td" // def : Processor<"generic", G3Itineraries, [Directive32]>; -def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL, - FeatureFRES, FeatureFRSQRTE, - FeatureBookE, DeprecatedMFTB]>; -def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL, - FeatureFRES, FeatureFRSQRTE, - FeatureBookE, DeprecatedMFTB]>; +def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL, + FeatureFRES, FeatureFRSQRTE, + FeatureBookE, DeprecatedMFTB]>; +def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL, + FeatureFRES, FeatureFRSQRTE, + FeatureBookE, DeprecatedMFTB]>; def : Processor<"601", G3Itineraries, [Directive601]>; def : Processor<"602", G3Itineraries, [Directive602]>; def : Processor<"603", G3Itineraries, [Directive603, @@ -254,7 +277,7 @@ def : ProcessorModel<"pwr6x", G5Model, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, Feature64Bit, DeprecatedMFTB, DeprecatedDST]>; -def : ProcessorModel<"pwr7", G5Model, +def : ProcessorModel<"pwr7", P7Model, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, @@ -263,6 +286,15 @@ def : ProcessorModel<"pwr7", G5Model, FeaturePOPCNTD, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, DeprecatedMFTB, DeprecatedDST]>; +def : ProcessorModel<"pwr8", P7Model /* FIXME: Update to P8Model when available */, + [DirectivePwr8, FeatureAltivec, + FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, + FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */, + DeprecatedMFTB, DeprecatedDST]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : ProcessorModel<"ppc64", G5Model, [Directive64, FeatureAltivec, @@ -283,11 +315,11 @@ include "PPCCallingConv.td" def PPCInstrInfo : InstrInfo { let isLittleEndianEncoding = 1; -} -def PPCAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - bit isMCAsmWriter = 1; + // FIXME: Unset this when no longer needed! + let decodePositionallyEncodedOperands = 1; + + let noNamedPositionallyEncodedOperands = 1; } def PPCAsmParser : AsmParser { @@ -306,8 +338,7 @@ def PPCAsmParserVariant : AsmParserVariant { def PPC : Target { // Information about the instructions. let InstructionSet = PPCInstrInfo; - - let AssemblyWriters = [PPCAsmWriter]; + let AssemblyParsers = [PPCAsmParser]; let AssemblyParserVariants = [PPCAsmParserVariant]; } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 2d92a112d5ef..6f67c598c754 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -16,27 +16,29 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asmprinter" #include "PPC.h" #include "InstPrinter/PPCInstPrinter.h" -#include "MCTargetDesc/PPCPredicates.h" +#include "PPCMachineFunctionInfo.h" #include "MCTargetDesc/PPCMCExpr.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "PPCTargetStreamer.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -54,12 +56,13 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "asmprinter" + namespace { class PPCAsmPrinter : public AsmPrinter { protected: @@ -71,22 +74,22 @@ namespace { : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {} - virtual const char *getPassName() const { + const char *getPassName() const override { return "PowerPC Assembly Printer"; } MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym); - virtual void EmitInstruction(const MachineInstr *MI); + void EmitInstruction(const MachineInstr *MI) override; void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); + raw_ostream &O) override; bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); + raw_ostream &O) override; }; /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux @@ -95,15 +98,17 @@ namespace { explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : PPCAsmPrinter(TM, Streamer) {} - virtual const char *getPassName() const { + const char *getPassName() const override { return "Linux PPC Assembly Printer"; } - bool doFinalization(Module &M); + bool doFinalization(Module &M) override; + void EmitStartOfAsmFile(Module &M) override; - virtual void EmitFunctionEntryLabel(); + void EmitFunctionEntryLabel() override; - void EmitFunctionBodyEnd(); + void EmitFunctionBodyStart() override; + void EmitFunctionBodyEnd() override; }; /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac @@ -113,12 +118,12 @@ namespace { explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : PPCAsmPrinter(TM, Streamer) {} - virtual const char *getPassName() const { + const char *getPassName() const override { return "Darwin PPC Assembly Printer"; } - bool doFinalization(Module &M); - void EmitStartOfAsmFile(Module &M); + bool doFinalization(Module &M) override; + void EmitStartOfAsmFile(Module &M) override; void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs); }; @@ -130,7 +135,10 @@ static const char *stripRegisterPrefix(const char *RegName) { switch (RegName[0]) { case 'r': case 'f': - case 'v': return RegName + 1; + case 'v': + if (RegName[1] == 's') + return RegName + 2; + return RegName + 1; case 'c': if (RegName[1] == 'r') return RegName + 2; } @@ -139,6 +147,7 @@ static const char *stripRegisterPrefix(const char *RegName) { void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + const DataLayout *DL = TM.getDataLayout(); const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { @@ -157,37 +166,13 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, case MachineOperand::MO_MachineBasicBlock: O << *MO.getMBB()->getSymbol(); return; - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - // FIXME: PIC relocation model - return; case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << MO.getIndex(); return; case MachineOperand::MO_BlockAddress: O << *GetBlockAddressSymbol(MO.getBlockAddress()); return; - case MachineOperand::MO_ExternalSymbol: { - // Computing the address of an external symbol, not calling it. - if (TM.getRelocationModel() == Reloc::Static) { - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - return; - } - - MCSymbol *NLPSym = - OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+ - MO.getSymbolName()+"$non_lazy_ptr"); - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym); - if (StubSym.getPointer() == 0) - StubSym = MachineModuleInfoImpl:: - StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true); - - O << *NLPSym; - return; - } case MachineOperand::MO_GlobalAddress: { // Computing the address of a global symbol, not calling it. const GlobalValue *GV = MO.getGlobal(); @@ -197,21 +182,21 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, if (TM.getRelocationModel() != Reloc::Static && (GV->isDeclaration() || GV->isWeakForLinker())) { if (!GV->hasHiddenVisibility()) { - SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>() .getGVStubEntry(SymToPrint); - if (StubSym.getPointer() == 0) + if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl:: StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); } else if (GV->isDeclaration() || GV->hasCommonLinkage() || GV->hasAvailableExternallyLinkage()) { - SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>(). getHiddenGVStubEntry(SymToPrint); - if (StubSym.getPointer() == 0) + if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl:: StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); } else { @@ -228,7 +213,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, } default: - O << "<unknown operand type: " << MO.getType() << ">"; + O << "<unknown operand type: " << (unsigned)MO.getType() << ">"; return; } } @@ -305,13 +290,13 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, /// exists for it. If not, create one. Then return a symbol that references /// the TOC entry. MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { - + const DataLayout *DL = TM.getDataLayout(); MCSymbol *&TOCEntry = TOC[Sym]; // To avoid name clash check if the name already exists. - while (TOCEntry == 0) { - if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) + - "C" + Twine(TOCLabelID++)) == 0) { + while (!TOCEntry) { + if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) + + "C" + Twine(TOCLabelID++)) == nullptr) { TOCEntry = GetTempSymbol("C", TOCLabelID); } } @@ -325,6 +310,7 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { /// void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; + bool isPPC64 = Subtarget.isPPC64(); // Lower multi-instruction pseudo operations. switch (MI->getOpcode()) { @@ -340,7 +326,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *PICBase = MF->getPICBaseSymbol(); // Emit the 'bl'. - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL) // FIXME: We would like an efficient form for this, so we don't have to do // a lot of extra uniquing. .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); @@ -349,6 +335,66 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitLabel(PICBase); return; } + case PPC::GetGBRO: { + // Get the offset from the GOT Base Register to the GOT + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + MCSymbol *PICOffset = MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol(); + TmpInst.setOpcode(PPC::LWZ); + const MCExpr *Exp = + MCSymbolRefExpr::Create(PICOffset, MCSymbolRefExpr::VK_None, OutContext); + const MCExpr *PB = + MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), + MCSymbolRefExpr::VK_None, + OutContext); + const MCOperand MO = TmpInst.getOperand(1); + TmpInst.getOperand(1) = MCOperand::CreateExpr(MCBinaryExpr::CreateSub(Exp, + PB, + OutContext)); + TmpInst.addOperand(MO); + EmitToStreamer(OutStreamer, TmpInst); + return; + } + case PPC::UpdateGBR: { + // Update the GOT Base Register to point to the GOT. It may be possible to + // merge this with the PPC::GetGBRO, doing it all in one step. + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + TmpInst.setOpcode(PPC::ADD4); + TmpInst.addOperand(TmpInst.getOperand(0)); + EmitToStreamer(OutStreamer, TmpInst); + return; + } + case PPC::LWZtoc: { + // Transform %X3 = LWZtoc <ga:@min1>, %X2 + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + + // Change the opcode to LWZ, and the global address operand to be a + // reference to the GOT entry we will synthesize later. + TmpInst.setOpcode(PPC::LWZ); + const MachineOperand &MO = MI->getOperand(1); + + // Map symbol -> label of TOC entry + assert(MO.isGlobal() || MO.isCPI() || MO.isJTI()); + MCSymbol *MOSymbol = nullptr; + if (MO.isGlobal()) + MOSymbol = getSymbol(MO.getGlobal()); + else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + else if (MO.isJTI()) + MOSymbol = GetJTISymbol(MO.getIndex()); + + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + + const MCExpr *Exp = + MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_None, + OutContext); + const MCExpr *PB = + MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".L.TOC.")), + OutContext); + Exp = MCBinaryExpr::CreateSub(Exp, PB, OutContext); + TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); + EmitToStreamer(OutStreamer, TmpInst); + return; + } case PPC::LDtocJTI: case PPC::LDtocCPT: case PPC::LDtoc: { @@ -362,7 +408,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Map symbol -> label of TOC entry assert(MO.isGlobal() || MO.isCPI() || MO.isJTI()); - MCSymbol *MOSymbol = 0; + MCSymbol *MOSymbol = nullptr; if (MO.isGlobal()) MOSymbol = getSymbol(MO.getGlobal()); else if (MO.isCPI()) @@ -376,7 +422,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); return; } @@ -384,45 +430,42 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Transform %Xd = ADDIStocHA %X2, <ga:@sym> LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - // Change the opcode to ADDIS8. If the global address is external, - // has common linkage, is a function address, or is a jump table + // Change the opcode to ADDIS8. If the global address is external, has + // common linkage, is a non-local function address, or is a jump table // address, then generate a TOC entry and reference that. Otherwise // reference the symbol directly. TmpInst.setOpcode(PPC::ADDIS8); const MachineOperand &MO = MI->getOperand(2); assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) && "Invalid operand for ADDIStocHA!"); - MCSymbol *MOSymbol = 0; + MCSymbol *MOSymbol = nullptr; bool IsExternal = false; - bool IsFunction = false; + bool IsNonLocalFunction = false; bool IsCommon = false; bool IsAvailExt = false; if (MO.isGlobal()) { - const GlobalValue *GValue = MO.getGlobal(); - const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); - const GlobalValue *RealGValue = GAlias ? - GAlias->resolveAliasedGlobal(false) : GValue; - MOSymbol = getSymbol(RealGValue); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); - IsExternal = GVar && !GVar->hasInitializer(); - IsCommon = GVar && RealGValue->hasCommonLinkage(); - IsFunction = !GVar; - IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage(); + const GlobalValue *GV = MO.getGlobal(); + MOSymbol = getSymbol(GV); + IsExternal = GV->isDeclaration(); + IsCommon = GV->hasCommonLinkage(); + IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() && + (GV->isDeclaration() || GV->isWeakForLinker()); + IsAvailExt = GV->hasAvailableExternallyLinkage(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); else if (MO.isJTI()) MOSymbol = GetJTISymbol(MO.getIndex()); - if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI() || - TM.getCodeModel() == CodeModel::Large) + if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt || + MO.isJTI() || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA, OutContext); TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); return; } case PPC::LDtocL: { @@ -436,7 +479,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(1); assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) && "Invalid operand for LDtocL!"); - MCSymbol *MOSymbol = 0; + MCSymbol *MOSymbol = nullptr; if (MO.isJTI()) MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); @@ -447,14 +490,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } else if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); - const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); - const GlobalValue *RealGValue = GAlias ? - GAlias->resolveAliasedGlobal(false) : GValue; - MOSymbol = getSymbol(RealGValue); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); - - if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || - RealGValue->hasAvailableExternallyLinkage() || + MOSymbol = getSymbol(GValue); + if (GValue->getType()->getElementType()->isFunctionTy() || + GValue->isDeclaration() || GValue->hasCommonLinkage() || + GValue->hasAvailableExternallyLinkage() || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); } @@ -463,7 +502,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); return; } case PPC::ADDItocL: { @@ -476,30 +515,28 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { TmpInst.setOpcode(PPC::ADDI8); const MachineOperand &MO = MI->getOperand(2); assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL"); - MCSymbol *MOSymbol = 0; + MCSymbol *MOSymbol = nullptr; bool IsExternal = false; - bool IsFunction = false; + bool IsNonLocalFunction = false; if (MO.isGlobal()) { - const GlobalValue *GValue = MO.getGlobal(); - const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); - const GlobalValue *RealGValue = GAlias ? - GAlias->resolveAliasedGlobal(false) : GValue; - MOSymbol = getSymbol(RealGValue); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); - IsExternal = GVar && !GVar->hasInitializer(); - IsFunction = !GVar; + const GlobalValue *GV = MO.getGlobal(); + MOSymbol = getSymbol(GV); + IsExternal = GV->isDeclaration(); + IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() && + (GV->isDeclaration() || GV->isWeakForLinker()); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); - if (IsFunction || IsExternal || TM.getCodeModel() == CodeModel::Large) + if (IsNonLocalFunction || IsExternal || + TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); return; } case PPC::ADDISgotTprelHA: { @@ -512,18 +549,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymGotTprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) .addReg(PPC::X2) .addExpr(SymGotTprel)); return; } - case PPC::LDgotTprelL: { + case PPC::LDgotTprelL: + case PPC::LDgotTprelL32: { // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); // Change the opcode to LD. - TmpInst.setOpcode(PPC::LD); + TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ); const MachineOperand &MO = MI->getOperand(1); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -531,7 +569,25 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); + return; + } + + case PPC::PPC32GOT: { + MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); + const MCExpr *SymGotTlsL = + MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO, + OutContext); + const MCExpr *SymGotTlsHA = + MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA, + OutContext); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI) + .addReg(MI->getOperand(0).getReg()) + .addExpr(SymGotTlsL)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(0).getReg()) + .addExpr(SymGotTlsHA)); return; } case PPC::ADDIStlsgdHA: { @@ -544,7 +600,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymGotTlsGD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) .addReg(PPC::X2) .addExpr(SymGotTlsGD)); @@ -560,7 +616,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymGotTlsGD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsGD)); @@ -581,7 +637,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS) .addExpr(TlsRef) .addExpr(SymVar)); return; @@ -596,7 +652,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymGotTlsLD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) .addReg(PPC::X2) .addExpr(SymGotTlsLD)); @@ -612,7 +668,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymGotTlsLD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsLD)); @@ -633,7 +689,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS) .addExpr(TlsRef) .addExpr(SymVar)); return; @@ -648,7 +704,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymDtprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) .addReg(PPC::X3) .addExpr(SymDtprel)); @@ -664,7 +720,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymDtprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); @@ -679,7 +735,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MI->getOpcode() == PPC::MFOCRF ? PPC::MFCR : PPC::MFCR8; OutStreamer.AddComment(PPCInstPrinter:: getRegisterName(MI->getOperand(1).getReg())); - OutStreamer.EmitInstruction(MCInstBuilder(NewOpcode) + EmitToStreamer(OutStreamer, MCInstBuilder(NewOpcode) .addReg(MI->getOperand(0).getReg())); return; } @@ -695,7 +751,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { ->getEncodingValue(MI->getOperand(0).getReg()); OutStreamer.AddComment(PPCInstPrinter:: getRegisterName(MI->getOperand(0).getReg())); - OutStreamer.EmitInstruction(MCInstBuilder(NewOpcode) + EmitToStreamer(OutStreamer, MCInstBuilder(NewOpcode) .addImm(Mask) .addReg(MI->getOperand(1).getReg())); return; @@ -723,13 +779,76 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); +} + +void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) { + if (Subtarget.isELFv2ABI()) { + PPCTargetStreamer *TS = + static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer()); + + if (TS) + TS->emitAbiVersion(2); + } + + if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_) + return AsmPrinter::EmitStartOfAsmFile(M); + + // FIXME: The use of .got2 assumes large GOT model (-fPIC), which is not + // optimal for some cases. We should consider supporting small model (-fpic) + // as well in the future. + assert(TM.getCodeModel() != CodeModel::Small && + "Small code model PIC is currently unsupported."); + OutStreamer.SwitchSection(OutContext.getELFSection(".got2", + ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getReadOnly())); + + MCSymbol *TOCSym = OutContext.GetOrCreateSymbol(Twine(".L.TOC.")); + MCSymbol *CurrentPos = OutContext.CreateTempSymbol(); + + OutStreamer.EmitLabel(CurrentPos); + + // The GOT pointer points to the middle of the GOT, in order to reference the + // entire 64kB range. 0x8000 is the midpoint. + const MCExpr *tocExpr = + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(CurrentPos, OutContext), + MCConstantExpr::Create(0x8000, OutContext), + OutContext); + + OutStreamer.EmitAssignment(TOCSym, tocExpr); + + OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); } void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { - if (!Subtarget.isPPC64()) // linux/ppc32 - Normal entry label. + // linux/ppc32 - Normal entry label. + if (!Subtarget.isPPC64() && TM.getRelocationModel() != Reloc::PIC_) return AsmPrinter::EmitFunctionEntryLabel(); - + + if (!Subtarget.isPPC64()) { + const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>(); + if (PPCFI->usesPICBase()) { + MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol(); + MCSymbol *PICBase = MF->getPICBaseSymbol(); + OutStreamer.EmitLabel(RelocSymbol); + + const MCExpr *OffsExpr = + MCBinaryExpr::CreateSub( + MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".L.TOC.")), + OutContext), + MCSymbolRefExpr::Create(PICBase, OutContext), + OutContext); + OutStreamer.EmitValue(OffsExpr, 4); + OutStreamer.EmitLabel(CurrentFnSym); + return; + } else + return AsmPrinter::EmitFunctionEntryLabel(); + } + + // ELFv2 ABI - Normal entry label. + if (Subtarget.isELFv2ABI()) + return AsmPrinter::EmitFunctionEntryLabel(); + // Emit an official procedure descriptor. MCSectionSubPair Current = OutStreamer.getCurrentSection(); const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd", @@ -766,10 +885,17 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { bool isPPC64 = TD->getPointerSizeInBits() == 64; PPCTargetStreamer &TS = - static_cast<PPCTargetStreamer &>(OutStreamer.getTargetStreamer()); + static_cast<PPCTargetStreamer &>(*OutStreamer.getTargetStreamer()); - if (isPPC64 && !TOC.empty()) { - const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc", + if (!TOC.empty()) { + const MCSectionELF *Section; + + if (isPPC64) + Section = OutStreamer.getContext().getELFSection(".toc", + ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getReadOnly()); + else + Section = OutStreamer.getContext().getELFSection(".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getReadOnly()); OutStreamer.SwitchSection(Section); @@ -778,7 +904,10 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { E = TOC.end(); I != E; ++I) { OutStreamer.EmitLabel(I->second); MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName()); - TS.emitTCEntry(*S); + if (isPPC64) + TS.emitTCEntry(*S); + else + OutStreamer.EmitSymbolValue(S, 4); } } @@ -804,6 +933,68 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } +/// EmitFunctionBodyStart - Emit a global entry point prefix for ELFv2. +void PPCLinuxAsmPrinter::EmitFunctionBodyStart() { + // In the ELFv2 ABI, in functions that use the TOC register, we need to + // provide two entry points. The ABI guarantees that when calling the + // local entry point, r2 is set up by the caller to contain the TOC base + // for this function, and when calling the global entry point, r12 is set + // up by the caller to hold the address of the global entry point. We + // thus emit a prefix sequence along the following lines: + // + // func: + // # global entry point + // addis r2,r12,(.TOC.-func)@ha + // addi r2,r2,(.TOC.-func)@l + // .localentry func, .-func + // # local entry point, followed by function body + // + // This ensures we have r2 set up correctly while executing the function + // body, no matter which entry point is called. + if (Subtarget.isELFv2ABI() + // Only do all that if the function uses r2 in the first place. + && !MF->getRegInfo().use_empty(PPC::X2)) { + + MCSymbol *GlobalEntryLabel = OutContext.CreateTempSymbol(); + OutStreamer.EmitLabel(GlobalEntryLabel); + const MCSymbolRefExpr *GlobalEntryLabelExp = + MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext); + + MCSymbol *TOCSymbol = OutContext.GetOrCreateSymbol(StringRef(".TOC.")); + const MCExpr *TOCDeltaExpr = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext), + GlobalEntryLabelExp, OutContext); + + const MCExpr *TOCDeltaHi = + PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS) + .addReg(PPC::X2) + .addReg(PPC::X12) + .addExpr(TOCDeltaHi)); + + const MCExpr *TOCDeltaLo = + PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI) + .addReg(PPC::X2) + .addReg(PPC::X2) + .addExpr(TOCDeltaLo)); + + MCSymbol *LocalEntryLabel = OutContext.CreateTempSymbol(); + OutStreamer.EmitLabel(LocalEntryLabel); + const MCSymbolRefExpr *LocalEntryLabelExp = + MCSymbolRefExpr::Create(LocalEntryLabel, OutContext); + const MCExpr *LocalOffsetExp = + MCBinaryExpr::CreateSub(LocalEntryLabelExp, + GlobalEntryLabelExp, OutContext); + + PPCTargetStreamer *TS = + static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer()); + + if (TS) + TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp); + } +} + /// EmitFunctionBodyEnd - Print the traceback table before the .size /// directive. /// @@ -854,13 +1045,12 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { if (Subtarget.isPPC64() && Directive < PPC::DIR_64) Directive = PPC::DIR_64; assert(Directive <= PPC::DIR_64 && "Directive out of range."); - - // FIXME: This is a total hack, finish mc'izing the PPC backend. - if (OutStreamer.hasRawTextSupport()) { - assert(Directive < array_lengthof(CPUDirectives) && - "CPUDirectives[] might not be up-to-date!"); - OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive])); - } + + assert(Directive < array_lengthof(CPUDirectives) && + "CPUDirectives[] might not be up-to-date!"); + PPCTargetStreamer &TStreamer = + *static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer()); + TStreamer.emitMachine(CPUDirectives[Directive]); // Prime text sections so they are adjacent. This reduces the likelihood a // large data or debug section causes a branch to exceed 16M limit. @@ -870,14 +1060,14 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { if (TM.getRelocationModel() == Reloc::PIC_) { OutStreamer.SwitchSection( OutContext.getMachOSection("__TEXT", "__picsymbolstub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + MachO::S_SYMBOL_STUBS | + MachO::S_ATTR_PURE_INSTRUCTIONS, 32, SectionKind::getText())); } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) { OutStreamer.SwitchSection( OutContext.getMachOSection("__TEXT","__symbol_stub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + MachO::S_SYMBOL_STUBS | + MachO::S_ATTR_PURE_INSTRUCTIONS, 16, SectionKind::getText())); } OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); @@ -909,8 +1099,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { if (TM.getRelocationModel() == Reloc::PIC_) { const MCSection *StubSection = OutContext.getMachOSection("__TEXT", "__picsymbolstub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + MachO::S_SYMBOL_STUBS | + MachO::S_ATTR_PURE_INSTRUCTIONS, 32, SectionKind::getText()); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.SwitchSection(StubSection); @@ -930,32 +1120,32 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext); // mflr r0 - OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0)); // bcl 20, 31, AnonSymbol - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCLalways).addExpr(Anon)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCLalways).addExpr(Anon)); OutStreamer.EmitLabel(AnonSymbol); // mflr r11 - OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); // addis r11, r11, ha16(LazyPtr - AnonSymbol) const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, isDarwin, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS) .addReg(PPC::R11) .addReg(PPC::R11) .addExpr(SubHa16)); // mtlr r0 - OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTLR).addReg(PPC::R0)); // ldu r12, lo16(LazyPtr - AnonSymbol)(r11) // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11) const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, isDarwin, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) + EmitToStreamer(OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) .addReg(PPC::R12) .addExpr(SubLo16).addExpr(SubLo16) .addReg(PPC::R11)); // mtctr r12 - OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); // bctr - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTR)); OutStreamer.SwitchSection(LSPSection); OutStreamer.EmitLabel(LazyPtr); @@ -977,8 +1167,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { const MCSection *StubSection = OutContext.getMachOSection("__TEXT","__symbol_stub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + MachO::S_SYMBOL_STUBS | + MachO::S_ATTR_PURE_INSTRUCTIONS, 16, SectionKind::getText()); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { MCSymbol *Stub = Stubs[i].first; @@ -994,7 +1184,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // lis r11, ha16(LazyPtr) const MCExpr *LazyPtrHa16 = PPCMCExpr::CreateHa(LazyPtrExpr, isDarwin, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS) + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LIS) .addReg(PPC::R11) .addExpr(LazyPtrHa16)); @@ -1002,15 +1192,15 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // lwzu r12, lo16(LazyPtr)(r11) const MCExpr *LazyPtrLo16 = PPCMCExpr::CreateLo(LazyPtrExpr, isDarwin, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) + EmitToStreamer(OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) .addReg(PPC::R12) .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16) .addReg(PPC::R11)); // mtctr r12 - OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); // bctr - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR)); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTR)); OutStreamer.SwitchSection(LSPSection); OutStreamer.EmitLabel(LazyPtr); @@ -1051,7 +1241,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { for (std::vector<const Function*>::const_iterator I = Personalities.begin(), E = Personalities.end(); I != E; ++I) { if (*I) { - MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); + MCSymbol *NLPSym = getSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = MMIMacho.getGVStubEntry(NLPSym); StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true); @@ -1140,4 +1330,5 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm, extern "C" void LLVMInitializePowerPCAsmPrinter() { TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass); TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass); + TargetRegistry::RegisterAsmPrinter(ThePPC64LETarget, createPPCAsmPrinterPass); } diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 3e608ca8f679..ee906712ee02 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ppc-branch-select" #include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPCInstrBuilder.h" @@ -26,6 +25,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "ppc-branch-select" + STATISTIC(NumExpanded, "Number of branches expanded to long format"); namespace llvm { @@ -42,9 +43,9 @@ namespace { /// BlockSizes - The sizes of the basic blocks in the function. std::vector<unsigned> BlockSizes; - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "PowerPC Branch Selector"; } }; @@ -112,9 +113,12 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { unsigned MBBStartOffset = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { - MachineBasicBlock *Dest = 0; + MachineBasicBlock *Dest = nullptr; if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm()) Dest = I->getOperand(2).getMBB(); + else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) && + !I->getOperand(1).isImm()) + Dest = I->getOperand(1).getMBB(); else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ || I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) && !I->getOperand(0).isImm()) @@ -166,6 +170,12 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. BuildMI(MBB, I, dl, TII->get(PPC::BCC)) .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); + } else if (I->getOpcode() == PPC::BC) { + unsigned CRBit = I->getOperand(0).getReg(); + BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2); + } else if (I->getOpcode() == PPC::BCn) { + unsigned CRBit = I->getOperand(0).getReg(); + BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BDNZ) { BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2); } else if (I->getOpcode() == PPC::BDNZ8) { diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index e419b9b40d8e..ec1e34d91f93 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -23,31 +23,29 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ctrloops" - #include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/Statistic.h" +#include "PPC.h" +#include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/PassSupport.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "PPCTargetMachine.h" -#include "PPC.h" #ifndef NDEBUG #include "llvm/CodeGen/MachineDominators.h" @@ -61,6 +59,8 @@ using namespace llvm; +#define DEBUG_TYPE "ctrloops" + #ifndef NDEBUG static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); #endif @@ -84,20 +84,20 @@ namespace { public: static char ID; - PPCCTRLoops() : FunctionPass(ID), TM(0) { + PPCCTRLoops() : FunctionPass(ID), TM(nullptr) { initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); } PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<LoopInfo>(); AU.addPreserved<LoopInfo>(); - AU.addRequired<DominatorTree>(); - AU.addPreserved<DominatorTree>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); AU.addRequired<ScalarEvolution>(); } @@ -109,7 +109,7 @@ namespace { PPCTargetMachine *TM; LoopInfo *LI; ScalarEvolution *SE; - DataLayout *TD; + const DataLayout *DL; DominatorTree *DT; const TargetLibraryInfo *LibInfo; }; @@ -128,12 +128,12 @@ namespace { initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineDominatorTree>(); MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; private: MachineDominatorTree *MDT; @@ -145,7 +145,7 @@ namespace { INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", @@ -170,8 +170,9 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() { bool PPCCTRLoops::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); - DT = &getAnalysis<DominatorTree>(); - TD = getAnalysisIfAvailable<DataLayout>(); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>(); bool MadeChange = false; @@ -188,7 +189,7 @@ bool PPCCTRLoops::runOnFunction(Function &F) { static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) - return ITy->getBitWidth() > (Is32Bit ? 32 : 64); + return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); return false; } @@ -369,6 +370,14 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { J->getOpcode() == Instruction::URem || J->getOpcode() == Instruction::SRem)) { return true; + } else if (TT.isArch32Bit() && + isLargeIntegerTy(false, J->getType()->getScalarType()) && + (J->getOpcode() == Instruction::Shl || + J->getOpcode() == Instruction::AShr || + J->getOpcode() == Instruction::LShr)) { + // Only on PPC32, for 128-bit integers (specifically not 64-bit + // integers), these might be runtime calls. + return true; } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { // On PowerPC, indirect jumps use the counter register. return true; @@ -423,9 +432,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { SmallVector<BasicBlock*, 4> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - BasicBlock *CountedExitBlock = 0; - const SCEV *ExitCount = 0; - BranchInst *CountedExitBranch = 0; + BasicBlock *CountedExitBlock = nullptr; + const SCEV *ExitCount = nullptr; + BranchInst *CountedExitBranch = nullptr; for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(), IE = ExitingBlocks.end(); I != IE; ++I) { const SCEV *EC = SE->getExitCount(L, *I); diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index e8e7f4c2d226..222760a0cb91 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -15,6 +15,8 @@ /// CCIfSubtarget - Match if the current subtarget has a feature F. class CCIfSubtarget<string F, CCAction A> : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; +class CCIfNotSubtarget<string F, CCAction A> + : CCIf<!strconcat("!State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; //===----------------------------------------------------------------------===// // Return Value Calling Convention @@ -23,17 +25,24 @@ class CCIfSubtarget<string F, CCAction A> // Return-value convention for PowerPC def RetCC_PPC : CallingConv<[ // On PPC64, integer return values are always promoted to i64 - CCIfType<[i32], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>, + CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>, + CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>, CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, + + // Floating point types returned as "direct" go into F1 .. F8; note that + // only the ELFv2 ABI fully utilizes all these registers. + CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, - CCIfType<[f32], CCAssignToReg<[F1, F2]>>, - CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>, - - // Vector types are always returned in V2. - CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>> + // Vector types returned as "direct" go into V2 .. V9; note that only the + // ELFv2 ABI fully utilizes all these registers. + CCIfType<[v16i8, v8i16, v4i32, v4f32], + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>, + CCIfType<[v2f64, v2i64], + CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>> ]>; @@ -46,6 +55,7 @@ def RetCC_PPC : CallingConv<[ // Only handle ints and floats. All ints are promoted to i64. // Vector types and quadword ints are not handled. def CC_PPC64_ELF_FIS : CallingConv<[ + CCIfType<[i1], CCPromoteToType<i64>>, CCIfType<[i8], CCPromoteToType<i64>>, CCIfType<[i16], CCPromoteToType<i64>>, CCIfType<[i32], CCPromoteToType<i64>>, @@ -58,14 +68,18 @@ def CC_PPC64_ELF_FIS : CallingConv<[ // and multiple register returns are "supported" to avoid compile // errors, but none are handled by the fast selector. def RetCC_PPC64_ELF_FIS : CallingConv<[ + CCIfType<[i1], CCPromoteToType<i64>>, CCIfType<[i8], CCPromoteToType<i64>>, CCIfType<[i16], CCPromoteToType<i64>>, CCIfType<[i32], CCPromoteToType<i64>>, CCIfType<[i64], CCAssignToReg<[X3, X4]>>, CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, - CCIfType<[f32], CCAssignToReg<[F1, F2]>>, - CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>, - CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>> + CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + CCIfType<[v16i8, v8i16, v4i32, v4f32], + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>, + CCIfType<[v2f64, v2i64], + CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>> ]>; //===----------------------------------------------------------------------===// @@ -73,6 +87,8 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[ //===----------------------------------------------------------------------===// def CC_PPC32_SVR4_Common : CallingConv<[ + CCIfType<[i1], CCPromoteToType<i32>>, + // The ABI requires i64 to be passed in two adjacent registers with the first // register having an odd register number. CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>, @@ -97,7 +113,7 @@ def CC_PPC32_SVR4_Common : CallingConv<[ CCIfType<[f32,f64], CCAssignToStack<8, 8>>, // Vectors get 16-byte stack slots that are 16-byte aligned. - CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToStack<16, 16>> + CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>> ]>; // This calling convention puts vector arguments always on the stack. It is used @@ -113,6 +129,9 @@ def CC_PPC32_SVR4 : CallingConv<[ // The first 12 Vector arguments are passed in AltiVec registers. CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>, + CCIfType<[v2f64, v2i64], + CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9, + VSH10, VSH11, VSH12, VSH13]>>, CCDelegateTo<CC_PPC32_SVR4_Common> ]>; diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 418736e21e86..08755238f925 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -32,7 +32,7 @@ namespace { JITCodeEmitter &MCE; MachineModuleInfo *MMI; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineModuleInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -73,11 +73,13 @@ namespace { unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getTLSCallEncoding(const MachineInstr &MI, unsigned OpNo) const; - const char *getPassName() const { return "PowerPC Machine Code Emitter"; } + const char *getPassName() const override { + return "PowerPC Machine Code Emitter"; + } /// runOnMachineFunction - emits the given MachineFunction to memory /// - bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; /// emitBasicBlock - emits the given MachineBasicBlock to memory /// @@ -102,7 +104,7 @@ bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) { MMI = &getAnalysis<MachineModuleInfo>(); MCE.setModuleInfo(MMI); do { - MovePCtoLROffset = 0; + MovePCtoLROffset = nullptr; MCE.startFunction(MF); for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) emitBasicBlock(*BB); @@ -121,7 +123,8 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { default: MCE.emitWordBE(getBinaryCodeForInstr(MI)); break; - case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::CFI_INSTRUCTION: + break; case TargetOpcode::EH_LABEL: MCE.emitLabel(MI.getOperand(0).getMCSymbol()); break; diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 4e3b0b83244a..2e524d604789 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -13,12 +13,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ppcfastisel" #include "PPC.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCISelLowering.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" -#include "MCTargetDesc/PPCPredicates.h" #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" @@ -28,12 +27,12 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -58,6 +57,8 @@ //===----------------------------------------------------------------------===// using namespace llvm; +#define DEBUG_TYPE "ppcfastisel" + namespace { typedef struct Address { @@ -80,12 +81,12 @@ typedef struct Address { } } Address; -class PPCFastISel : public FastISel { +class PPCFastISel final : public FastISel { const TargetMachine &TM; const TargetInstrInfo &TII; const TargetLowering &TLI; - const PPCSubtarget &PPCSubTarget; + const PPCSubtarget *PPCSubTarget; LLVMContext *Context; public: @@ -95,31 +96,29 @@ class PPCFastISel : public FastISel { TM(FuncInfo.MF->getTarget()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), - PPCSubTarget( - *((static_cast<const PPCTargetMachine *>(&TM))->getSubtargetImpl()) - ), + PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()), Context(&FuncInfo.Fn->getContext()) { } // Backend specific FastISel code. private: - virtual bool TargetSelectInstruction(const Instruction *I); - virtual unsigned TargetMaterializeConstant(const Constant *C); - virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); - virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI); - virtual bool FastLowerArguments(); - virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm); - virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - uint64_t Imm); - virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill); - virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill); + bool TargetSelectInstruction(const Instruction *I) override; + unsigned TargetMaterializeConstant(const Constant *C) override; + unsigned TargetMaterializeAlloca(const AllocaInst *AI) override; + bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) override; + bool FastLowerArguments() override; + unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override; + unsigned FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm); + unsigned FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill); + unsigned FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); // Instruction selection routines. private: @@ -127,7 +126,6 @@ class PPCFastISel : public FastISel { bool SelectStore(const Instruction *I); bool SelectBranch(const Instruction *I); bool SelectIndirectBr(const Instruction *I); - bool SelectCmp(const Instruction *I); bool SelectFPExt(const Instruction *I); bool SelectFPTrunc(const Instruction *I); bool SelectIToFP(const Instruction *I, bool IsSigned); @@ -283,7 +281,7 @@ bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { // Given a value Obj, create an Address object Addr that represents its // address. Return false if we can't handle it. bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { - const User *U = NULL; + const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast<Instruction>(Obj)) { // Don't walk into other basic blocks unless the object is an alloca from @@ -325,11 +323,11 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { II != IE; ++II, ++GTI) { const Value *Op = *II; if (StructType *STy = dyn_cast<StructType>(*GTI)) { - const StructLayout *SL = TD.getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { - uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); for (;;) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. @@ -407,7 +405,7 @@ void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset, // register and continue. This should almost never happen. if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) { unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8), ResultReg).addFrameIndex(Addr.Base.FI).addImm(0); Addr.Base.Reg = ResultReg; Addr.BaseType = Address::RegBase; @@ -499,13 +497,13 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI), MFI.getObjectAlignment(Addr.Base.FI)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO); // Base reg with offset in range. } else if (UseOffset) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addImm(Addr.Offset).addReg(Addr.Base.Reg); // Indexed form. @@ -529,7 +527,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, case PPC::LFS: Opc = PPC::LFSX; break; case PPC::LFD: Opc = PPC::LFDX; break; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(Addr.Base.Reg).addReg(IndexReg); } @@ -557,7 +555,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) { // to constrain RA from using R0/X0 when this is not legal. unsigned AssignedReg = FuncInfo.ValueMap[I]; const TargetRegisterClass *RC = - AssignedReg ? MRI.getRegClass(AssignedReg) : 0; + AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; unsigned ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC)) @@ -615,12 +613,15 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI), MFI.getObjectAlignment(Addr.Base.FI)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)).addReg(SrcReg) - .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg) + .addImm(Addr.Offset) + .addFrameIndex(Addr.Base.FI) + .addMemOperand(MMO); // Base reg with offset in range. } else if (UseOffset) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg); // Indexed form. @@ -640,7 +641,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { case PPC::STFS: Opc = PPC::STFSX; break; case PPC::STFD: Opc = PPC::STFDX; break; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg); } @@ -704,9 +705,9 @@ bool PPCFastISel::SelectBranch(const Instruction *I) { CondReg)) return false; - BuildMI(*BrBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCC)) + BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC)) .addImm(PPCPred).addReg(CondReg).addMBB(TBB); - FastEmitBranch(FBB, DL); + FastEmitBranch(FBB, DbgLoc); FuncInfo.MBB->addSuccessor(TBB); return true; @@ -714,7 +715,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) { dyn_cast<ConstantInt>(BI->getCondition())) { uint64_t Imm = CI->getZExtValue(); MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; - FastEmitBranch(Target, DL); + FastEmitBranch(Target, DbgLoc); return true; } @@ -737,6 +738,9 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, return false; MVT SrcVT = SrcEVT.getSimpleVT(); + if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits()) + return false; + // See if operand 2 is an immediate encodeable in the compare. // FIXME: Operands are not in canonical order at -O0, so an immediate // operand in position 1 is a lost opportunity for now. We are @@ -811,10 +815,10 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, } if (!UseImm) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg) .addReg(SrcReg1).addReg(SrcReg2); else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg) .addReg(SrcReg1).addImm(Imm); return true; @@ -853,7 +857,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) { // Round the result to single precision. unsigned DestReg = createResultReg(&PPC::F4RCRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg) .addReg(SrcReg); UpdateValueMap(I, DestReg); @@ -895,7 +899,7 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, if (!IsSigned) { LoadOpc = PPC::LFIWZX; Addr.Offset = 4; - } else if (PPCSubTarget.hasLFIWAX()) { + } else if (PPCSubTarget->hasLFIWAX()) { LoadOpc = PPC::LFIWAX; Addr.Offset = 4; } @@ -936,7 +940,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { // We can only lower an unsigned convert if we have the newer // floating-point conversion operations. - if (!IsSigned && !PPCSubTarget.hasFPCVT()) + if (!IsSigned && !PPCSubTarget->hasFPCVT()) return false; // FIXME: For now we require the newer floating-point conversion operations @@ -944,7 +948,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { // to single-precision float. Otherwise we have to generate a lot of // fiddly code to avoid double rounding. If necessary, the fiddly code // can be found in PPCTargetLowering::LowerINT_TO_FP(). - if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT()) + if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT()) return false; // Extend the input if necessary. @@ -972,7 +976,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU; // Generate the convert. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addReg(FPReg); UpdateValueMap(I, DestReg); @@ -1007,7 +1011,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, // to determine the required register class. unsigned AssignedReg = FuncInfo.ValueMap[I]; const TargetRegisterClass *RC = - AssignedReg ? MRI.getRegClass(AssignedReg) : 0; + AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; unsigned ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned)) @@ -1026,6 +1030,10 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { if (DstVT != MVT::i32 && DstVT != MVT::i64) return false; + // If we don't have FCTIDUZ and we need it, punt to SelectionDAG. + if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT()) + return false; + Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); if (!isTypeLegal(SrcTy, SrcVT)) @@ -1044,7 +1052,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg); if (InRC == &PPC::F4RCRegClass) { unsigned TmpReg = createResultReg(&PPC::F8RCRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg) .addReg(SrcReg).addImm(PPC::F8RCRegClassID); SrcReg = TmpReg; @@ -1059,12 +1067,12 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { if (IsSigned) Opc = PPC::FCTIWZ; else - Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; + Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; else Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ; // Generate the convert. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addReg(SrcReg); // Now move the integer value from a float register to an integer register. @@ -1157,8 +1165,10 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { } if (UseImm) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) - .addReg(SrcReg1).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), + ResultReg) + .addReg(SrcReg1) + .addImm(Imm); UpdateValueMap(I, ResultReg); return true; } @@ -1173,7 +1183,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { if (ISDOpcode == ISD::SUB) std::swap(SrcReg1, SrcReg2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(SrcReg1).addReg(SrcReg2); UpdateValueMap(I, ResultReg); return true; @@ -1191,6 +1201,13 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, bool IsVarArg) { SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context); + + // Reserve space for the linkage area on the stack. + bool isELFv2ABI = PPCSubTarget->isELFv2ABI(); + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, + isELFv2ABI); + CCInfo.AllocateStack(LinkageSize, 8); + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS); // Bail out if we can't handle any of the arguments. @@ -1200,7 +1217,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, // Skip vector arguments for now, as well as long double and // uint128_t, and anything that isn't passed in a register. - if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || + if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 || !VA.isRegLoc() || VA.needsCustom()) return false; @@ -1212,8 +1229,16 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, // Get a count of how many bytes are to be pushed onto the stack. NumBytes = CCInfo.getNextStackOffset(); + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. + NumBytes = std::max(NumBytes, LinkageSize + 64); + // Issue CALLSEQ_START. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TII.getCallFrameSetupOpcode())) .addImm(NumBytes); @@ -1272,9 +1297,9 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, ++NextGPR; } else ArgReg = NextGPR++; - - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ArgReg).addReg(Arg); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg); RegArgs.push_back(ArgReg); } @@ -1287,7 +1312,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, const Instruction *I, CallingConv::ID CC, unsigned &NumBytes, bool IsVarArg) { // Issue CallSEQ_END. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TII.getCallFrameDestroyOpcode())) .addImm(NumBytes).addImm(0); @@ -1317,14 +1342,14 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT); ResultReg = createResultReg(CpyRC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(SourcePhysReg); // If necessary, round the floating result to single precision. } else if (CopyVT == MVT::f64) { ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), ResultReg).addReg(SourcePhysReg); // If only the low half of a general register is needed, generate @@ -1335,7 +1360,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, ResultReg = createResultReg(&PPC::GPRCRegClass); // Convert physical register from G8RC to GPRC. SourcePhysReg -= PPC::X0 - PPC::R0; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(SourcePhysReg); } @@ -1442,7 +1467,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) { if (Arg == 0) return false; - unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); Flags.setOrigAlign(OriginalAlignment); Args.push_back(*II); @@ -1467,7 +1492,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) { // Build direct call with NOP for TOC restore. // FIXME: We can and should optimize away the NOP for local calls. - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BL8_NOP)); // Add callee. MIB.addGlobalAddress(GV); @@ -1476,6 +1501,10 @@ bool PPCFastISel::SelectCall(const Instruction *I) { for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II) MIB.addReg(RegArgs[II], RegState::Implicit); + // Direct calls in the ELFv2 ABI need the TOC register live into the call. + if (PPCSubTarget->isELFv2ABI()) + MIB.addReg(PPC::X2, RegState::Implicit); + // Add a register mask with the call-preserved registers. Proper // defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(CC)); @@ -1524,8 +1553,8 @@ bool PPCFastISel::SelectRet(const Instruction *I) { const Constant *C = cast<Constant>(RV); unsigned SrcReg = PPCMaterializeInt(C, MVT::i64); unsigned RetReg = ValLocs[0].getLocReg(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - RetReg).addReg(SrcReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg); RetRegs.push_back(RetReg); } else { @@ -1580,14 +1609,14 @@ bool PPCFastISel::SelectRet(const Instruction *I) { } } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), RetRegs[i]) .addReg(SrcReg); } } } - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BLR)); for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) @@ -1617,7 +1646,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??"); Opc = PPC::EXTSW_32_64; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addReg(SrcReg); // Unsigned 32-bit extensions use RLWINM. @@ -1629,7 +1658,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??"); MB = 16; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM), DestReg) .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31); @@ -1642,7 +1671,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, MB = 48; else MB = 32; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICL_32_64), DestReg) .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB); } @@ -1656,9 +1685,9 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) { if (AddrReg == 0) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::MTCTR8)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8)) .addReg(AddrReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCTR8)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8)); const IndirectBrInst *IB = cast<IndirectBrInst>(I); for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i) @@ -1686,7 +1715,8 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) { // The only interesting case is when we need to switch register classes. if (SrcVT == MVT::i64) { unsigned ResultReg = createResultReg(&PPC::GPRCRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(SrcReg, 0, PPC::sub_32); SrcReg = ResultReg; } @@ -1793,7 +1823,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { return 0; // All FP constants are loaded from the constant pool. - unsigned Align = TD.getPrefTypeAlignment(CFP->getType()); + unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); assert(Align > 0 && "Unexpectedly missing alignment information!"); unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); @@ -1809,25 +1839,25 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)). if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT), TmpReg) .addConstantPoolIndex(Idx).addReg(PPC::X2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addImm(0).addReg(TmpReg).addMemOperand(MMO); } else { // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)). - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx); // But for large code model, we must generate a LDtocL followed // by the LF[SD]. if (CModel == CodeModel::Large) { unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addImm(0).addReg(TmpReg2); } else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO) .addReg(TmpReg) .addMemOperand(MMO); @@ -1851,25 +1881,20 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // FIXME: Jump tables are not yet required because fast-isel doesn't // handle switches; if that changes, we need them as well. For now, // what follows assumes everything's a generic (or TLS) global address. - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); - if (!GVar) { - // If GV is an alias, use the aliasee for determining thread-locality. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)); - } // FIXME: We don't yet handle the complexity of TLS. - bool IsTLS = GVar && GVar->isThreadLocal(); - if (IsTLS) + if (GV->isThreadLocal()) return 0; // For small code model, generate a simple TOC load. if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtoc), DestReg) - .addGlobalAddress(GV).addReg(PPC::X2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc), + DestReg) + .addGlobalAddress(GV) + .addReg(PPC::X2); else { - // If the address is an externally defined symbol, a symbol with - // common or externally available linkage, a function address, or a + // If the address is an externally defined symbol, a symbol with common + // or externally available linkage, a non-local function address, or a // jump table address (not yet needed), or if we are generating code // for large code model, we generate: // LDtocL(GV, ADDIStocHA(%X2, GV)) @@ -1877,20 +1902,21 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // ADDItocL(ADDIStocHA(%X2, GV), GV) // Either way, start with the ADDIStocHA: unsigned HighPartReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), HighPartReg).addReg(PPC::X2).addGlobalAddress(GV); - // !GVar implies a function address. An external variable is one - // without an initializer. // If/when switches are implemented, jump tables should be handled // on the "if" path here. - if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() || - GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage()) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL), + if (CModel == CodeModel::Large || + (GV->getType()->getElementType()->isFunctionTy() && + (GV->isDeclaration() || GV->isWeakForLinker())) || + GV->isDeclaration() || GV->hasCommonLinkage() || + GV->hasAvailableExternallyLinkage()) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), DestReg).addGlobalAddress(GV).addReg(HighPartReg); else // Otherwise generate the ADDItocL. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDItocL), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL), DestReg).addReg(HighPartReg).addGlobalAddress(GV); } @@ -1908,21 +1934,21 @@ unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm, bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass); if (isInt<16>(Imm)) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg) .addImm(Imm); else if (Lo) { // Both Lo and Hi have nonzero bits. unsigned TmpReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg) .addImm(Hi); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg) .addReg(TmpReg).addImm(Lo); } else // Just Hi bits. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg) .addImm(Hi); @@ -1962,7 +1988,7 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, unsigned TmpReg2; if (Imm) { TmpReg2 = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLDICR), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR), TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift); } else TmpReg2 = TmpReg1; @@ -1970,14 +1996,14 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, unsigned TmpReg3, Hi, Lo; if ((Hi = (Remainder >> 16) & 0xFFFF)) { TmpReg3 = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORIS8), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8), TmpReg3).addReg(TmpReg2).addImm(Hi); } else TmpReg3 = TmpReg2; if ((Lo = Remainder & 0xFFFF)) { unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORI8), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8), ResultReg).addReg(TmpReg3).addImm(Lo); return ResultReg; } @@ -1989,6 +2015,15 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, // Materialize an integer constant into a register, and return // the register number (or zero if we failed to handle it). unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { + // If we're using CR bit registers for i1 values, handle that as a special + // case first. + if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { + const ConstantInt *CI = cast<ConstantInt>(C); + unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg); + return ImmReg; + } if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) @@ -2002,7 +2037,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { if (isInt<16>(CI->getSExtValue())) { unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI; unsigned ImmReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) .addImm(CI->getSExtValue()); return ImmReg; } @@ -2051,7 +2086,7 @@ unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { if (SI != FuncInfo.StaticAllocaMap.end()) { unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8), ResultReg).addFrameIndex(SI->second).addImm(0); return ResultReg; } @@ -2130,7 +2165,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, unsigned ResultReg = MI->getOperand(0).getReg(); - if (!PPCEmitLoad(VT, ResultReg, Addr, 0, IsZExt)) + if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt)) return false; MI->eraseFromParent(); @@ -2154,6 +2189,15 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { if (Opc != ISD::Constant) return 0; + // If we're using CR bit registers for i1 values, handle that as a special + // case first. + if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { + unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg); + return ImmReg; + } + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return 0; @@ -2233,6 +2277,6 @@ namespace llvm { if (Subtarget->isPPC64() && Subtarget->isSVR4ABI()) return new PPCFastISel(FuncInfo, LibInfo); - return 0; + return nullptr; } } diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 0ac2ceddcc9e..b2577a9c7cf7 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -15,6 +15,7 @@ #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" +#include "PPCSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -35,6 +36,167 @@ static const uint16_t VRRegNo[] = { PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 }; +PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, + (STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0), + Subtarget(STI) {} + +// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. +const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( + unsigned &NumEntries) const { + if (Subtarget.isDarwinABI()) { + NumEntries = 1; + if (Subtarget.isPPC64()) { + static const SpillSlot darwin64Offsets = {PPC::X31, -8}; + return &darwin64Offsets; + } else { + static const SpillSlot darwinOffsets = {PPC::R31, -4}; + return &darwinOffsets; + } + } + + // Early exit if not using the SVR4 ABI. + if (!Subtarget.isSVR4ABI()) { + NumEntries = 0; + return nullptr; + } + + // Note that the offsets here overlap, but this is fixed up in + // processFunctionBeforeFrameFinalized. + + static const SpillSlot Offsets[] = { + // Floating-point register save area offsets. + {PPC::F31, -8}, + {PPC::F30, -16}, + {PPC::F29, -24}, + {PPC::F28, -32}, + {PPC::F27, -40}, + {PPC::F26, -48}, + {PPC::F25, -56}, + {PPC::F24, -64}, + {PPC::F23, -72}, + {PPC::F22, -80}, + {PPC::F21, -88}, + {PPC::F20, -96}, + {PPC::F19, -104}, + {PPC::F18, -112}, + {PPC::F17, -120}, + {PPC::F16, -128}, + {PPC::F15, -136}, + {PPC::F14, -144}, + + // General register save area offsets. + {PPC::R31, -4}, + {PPC::R30, -8}, + {PPC::R29, -12}, + {PPC::R28, -16}, + {PPC::R27, -20}, + {PPC::R26, -24}, + {PPC::R25, -28}, + {PPC::R24, -32}, + {PPC::R23, -36}, + {PPC::R22, -40}, + {PPC::R21, -44}, + {PPC::R20, -48}, + {PPC::R19, -52}, + {PPC::R18, -56}, + {PPC::R17, -60}, + {PPC::R16, -64}, + {PPC::R15, -68}, + {PPC::R14, -72}, + + // CR save area offset. We map each of the nonvolatile CR fields + // to the slot for CR2, which is the first of the nonvolatile CR + // fields to be assigned, so that we only allocate one save slot. + // See PPCRegisterInfo::hasReservedSpillSlot() for more information. + {PPC::CR2, -4}, + + // VRSAVE save area offset. + {PPC::VRSAVE, -4}, + + // Vector register save area + {PPC::V31, -16}, + {PPC::V30, -32}, + {PPC::V29, -48}, + {PPC::V28, -64}, + {PPC::V27, -80}, + {PPC::V26, -96}, + {PPC::V25, -112}, + {PPC::V24, -128}, + {PPC::V23, -144}, + {PPC::V22, -160}, + {PPC::V21, -176}, + {PPC::V20, -192}}; + + static const SpillSlot Offsets64[] = { + // Floating-point register save area offsets. + {PPC::F31, -8}, + {PPC::F30, -16}, + {PPC::F29, -24}, + {PPC::F28, -32}, + {PPC::F27, -40}, + {PPC::F26, -48}, + {PPC::F25, -56}, + {PPC::F24, -64}, + {PPC::F23, -72}, + {PPC::F22, -80}, + {PPC::F21, -88}, + {PPC::F20, -96}, + {PPC::F19, -104}, + {PPC::F18, -112}, + {PPC::F17, -120}, + {PPC::F16, -128}, + {PPC::F15, -136}, + {PPC::F14, -144}, + + // General register save area offsets. + {PPC::X31, -8}, + {PPC::X30, -16}, + {PPC::X29, -24}, + {PPC::X28, -32}, + {PPC::X27, -40}, + {PPC::X26, -48}, + {PPC::X25, -56}, + {PPC::X24, -64}, + {PPC::X23, -72}, + {PPC::X22, -80}, + {PPC::X21, -88}, + {PPC::X20, -96}, + {PPC::X19, -104}, + {PPC::X18, -112}, + {PPC::X17, -120}, + {PPC::X16, -128}, + {PPC::X15, -136}, + {PPC::X14, -144}, + + // VRSAVE save area offset. + {PPC::VRSAVE, -4}, + + // Vector register save area + {PPC::V31, -16}, + {PPC::V30, -32}, + {PPC::V29, -48}, + {PPC::V28, -64}, + {PPC::V27, -80}, + {PPC::V26, -96}, + {PPC::V25, -112}, + {PPC::V24, -128}, + {PPC::V23, -144}, + {PPC::V22, -160}, + {PPC::V21, -176}, + {PPC::V20, -192}}; + + if (Subtarget.isPPC64()) { + NumEntries = array_lengthof(Offsets64); + + return Offsets64; + } else { + NumEntries = array_lengthof(Offsets); + + return Offsets; + } +} + /// RemoveVRSaveCode - We have found that this function does not need any code /// to manipulate the VRSAVE register, even though it uses vector registers. /// This can happen when the only registers used are known to be live in or out @@ -222,7 +384,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, if (!DisableRedZone && (Subtarget.isPPC64() || // 32-bit SVR4, no stack- !Subtarget.isSVR4ABI() || // allocated locals. - FrameSize == 0) && + FrameSize == 0) && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. @@ -236,9 +398,10 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // Get the maximum call frame size of all the calls. unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); - // Maximum call frame needs to be at least big enough for linkage and 8 args. - unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(), - Subtarget.isDarwinABI()); + // Maximum call frame needs to be at least big enough for linkage area. + unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(), + Subtarget.isDarwinABI(), + Subtarget.isELFv2ABI()); maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); // If we have dynamic alloca then maxCallFrameSize needs to be aligned so @@ -281,8 +444,8 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { // Naked functions have no stack frame pushed, so we don't have a frame // pointer. - if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::Naked)) return false; return MF.getTarget().Options.DisableFramePointerElim(MF) || @@ -299,7 +462,7 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { const PPCRegisterInfo *RegInfo = static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo()); bool HasBP = RegInfo->hasBasePointer(MF); - unsigned BPReg = HasBP ? (unsigned) PPC::R30 : FPReg; + unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); @@ -344,18 +507,17 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { DebugLoc dl; bool needsFrameMoves = MMI.hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); + bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_; // Get processor type. bool isPPC64 = Subtarget.isPPC64(); // Get the ABI. bool isDarwinABI = Subtarget.isDarwinABI(); bool isSVR4ABI = Subtarget.isSVR4ABI(); + bool isELFv2ABI = Subtarget.isELFv2ABI(); assert((isDarwinABI || isSVR4ABI) && "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); - // Prepare for frame info. - MCSymbol *FrameLabel = 0; - // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, // process it. if (!isSVR4ABI) @@ -387,7 +549,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { bool HasBP = RegInfo->hasBasePointer(MF); unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; - unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30; + unsigned BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0; @@ -429,7 +591,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { assert(FPIndex && "No Frame Pointer Save Slot!"); FPOffset = FFI->getObjectOffset(FPIndex); } else { - FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); + FPOffset = + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); } } @@ -442,7 +605,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BPOffset = FFI->getObjectOffset(BPIndex); } else { BPOffset = - PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI); + PPCFrameLowering::getBasePointerSaveOffset(isPPC64, + isDarwinABI, + isPIC); } } @@ -463,6 +628,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { "Prologue CR saving supported only in 64-bit mode"); if (!MustSaveCRs.empty()) { // will only occur for PPC64 + // FIXME: In the ELFv2 ABI, we are not required to save all CR fields. + // If only one or two CR fields are clobbered, it could be more + // efficient to use mfocrf to selectively save just those fields. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg); for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) @@ -561,36 +729,38 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Add the "machine moves" for the instructions we generated above, but in // reverse order. if (needsFrameMoves) { - // Mark effective beginning of when frame pointer becomes valid. - FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel); - // Show update of SP. assert(NegFrameSize); - MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(FrameLabel, NegFrameSize)); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); if (HasFP) { unsigned Reg = MRI->getDwarfRegNum(FPReg, true); - MMI.addFrameInst( - MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset)); + CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } if (HasBP) { unsigned Reg = MRI->getDwarfRegNum(BPReg, true); - MMI.addFrameInst( - MCCFIInstruction::createOffset(FrameLabel, Reg, BPOffset)); + CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } if (MustSaveLR) { unsigned Reg = MRI->getDwarfRegNum(LRReg, true); - MMI.addFrameInst( - MCCFIInstruction::createOffset(FrameLabel, Reg, LROffset)); + CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } } - MCSymbol *ReadyLabel = 0; - // If there is a frame pointer, copy R1 into R31 if (HasFP) { BuildMI(MBB, MBBI, dl, OrInst, FPReg) @@ -598,19 +768,17 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(SPReg); if (needsFrameMoves) { - ReadyLabel = MMI.getContext().CreateTempSymbol(); - // Mark effective beginning of when frame pointer is ready. - BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel); - unsigned Reg = MRI->getDwarfRegNum(FPReg, true); - MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(ReadyLabel, Reg)); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); + + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } } if (needsFrameMoves) { - MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel; - // Add callee saved registers to move list. const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); for (unsigned I = 0, E = CSI.size(); I != E; ++I) { @@ -631,14 +799,22 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // For 64-bit SVR4 when we have spilled CRs, the spill location // is SP+8, not a frame-relative slot. if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { - MMI.addFrameInst(MCCFIInstruction::createOffset( - Label, MRI->getDwarfRegNum(PPC::CR2, true), 8)); + // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for + // the whole CR word. In the ELFv2 ABI, every CR that was + // actually saved gets its own CFI record. + unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(CRReg, true), 8)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); continue; } int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); - MMI.addFrameInst(MCCFIInstruction::createOffset( - Label, MRI->getDwarfRegNum(Reg, true), Offset)); + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } } } @@ -675,6 +851,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // Get the ABI. bool isDarwinABI = Subtarget.isDarwinABI(); bool isSVR4ABI = Subtarget.isSVR4ABI(); + bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_; // Check if the link register (LR) has been saved. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); @@ -685,7 +862,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, bool HasBP = RegInfo->hasBasePointer(MF); unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; - unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30; + unsigned BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0; unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg @@ -712,7 +889,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, assert(FPIndex && "No Frame Pointer Save Slot!"); FPOffset = FFI->getObjectOffset(FPIndex); } else { - FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); + FPOffset = + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); } } @@ -725,7 +903,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, BPOffset = FFI->getObjectOffset(BPIndex); } else { BPOffset = - PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI); + PPCFrameLowering::getBasePointerSaveOffset(isPPC64, + isDarwinABI, + isPIC); } } @@ -902,6 +1082,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FPSI = FI->getFramePointerSaveIndex(); bool isPPC64 = Subtarget.isPPC64(); bool isDarwinABI = Subtarget.isDarwinABI(); + bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_; MachineFrameInfo *MFI = MF.getFrameInfo(); // If the frame pointer save index hasn't been defined yet. @@ -916,7 +1097,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int BPSI = FI->getBasePointerSaveIndex(); if (!BPSI && RegInfo->hasBasePointer(MF)) { - int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI); + int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI, isPIC); // Allocate the frame index for the base pointer save area. BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); // Save the result. @@ -930,9 +1111,9 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); } - // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the + // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the // function uses CR 2, 3, or 4. - if (!isPPC64 && !isDarwinABI && + if (!isPPC64 && !isDarwinABI && (MRI.isPhysRegUsed(PPC::CR2) || MRI.isPhysRegUsed(PPC::CR3) || MRI.isPhysRegUsed(PPC::CR4))) { @@ -1106,10 +1287,10 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, unsigned Reg = CSI[i].getReg(); if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) - // Leave Darwin logic as-is. - || (!Subtarget.isSVR4ABI() && - (PPC::CRBITRCRegClass.contains(Reg) || - PPC::CRRCRegClass.contains(Reg)))) { + // Leave Darwin logic as-is. + || (!Subtarget.isSVR4ABI() && + (PPC::CRBITRCRegClass.contains(Reg) || + PPC::CRRCRegClass.contains(Reg)))) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); @@ -1190,11 +1371,11 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, } } -bool +bool PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { // Currently, this function only handles SVR4 32- and 64-bit ABIs. // Return false otherwise to maintain pre-existing behavior. @@ -1207,7 +1388,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL; bool CRSpilled = false; MachineInstrBuilder CRMIB; - + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); // Only Darwin actually uses the VRSAVE register, but it can still appear @@ -1237,21 +1418,21 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, CRSpilled = true; FuncInfo->setSpillsCR(); - // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have - // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. - CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) + // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have + // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. + CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) .addReg(Reg, RegState::ImplicitKill); - MBB.insert(MI, CRMIB); - MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) - .addReg(PPC::R12, - getKillRegState(true)), - CSI[i].getFrameIdx())); + MBB.insert(MI, CRMIB); + MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) + .addReg(PPC::R12, + getKillRegState(true)), + CSI[i].getFrameIdx())); } } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(MBB, MI, Reg, true, - CSI[i].getFrameIdx(), RC, TRI); + CSI[i].getFrameIdx(), RC, TRI); } } return true; @@ -1260,8 +1441,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, static void restoreCRs(bool isPPC64, bool is31, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) { + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) { MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = @@ -1275,12 +1456,12 @@ restoreCRs(bool isPPC64, bool is31, else { // 32-bit: FP-relative MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), - PPC::R12), - CSI[CSIIndex].getFrameIdx())); + PPC::R12), + CSI[CSIIndex].getFrameIdx())); RestoreOp = PPC::MTOCRF; MoveReg = PPC::R12; } - + if (CR2Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); @@ -1335,11 +1516,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -bool +bool PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { // Currently, this function only handles SVR4 32- and 64-bit ABIs. // Return false otherwise to maintain pre-existing behavior. @@ -1387,20 +1568,20 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // When we first encounter a non-CR register after seeing at // least one CR register, restore all spilled CRs together. if ((CR2Spilled || CR3Spilled || CR4Spilled) - && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { + && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { bool is31 = needsFP(*MF); restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, - MBB, I, CSI, CSIIndex); - CR2Spilled = CR3Spilled = CR4Spilled = false; + MBB, I, CSI, CSIIndex); + CR2Spilled = CR3Spilled = CR4Spilled = false; } // Default behavior for non-CR saves. const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), - RC, TRI); + RC, TRI); assert(I != MBB.begin() && - "loadRegFromStackSlot didn't insert any code!"); + "loadRegFromStackSlot didn't insert any code!"); } // Insert in reverse order. @@ -1409,16 +1590,15 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, else { I = BeforeI; ++I; - } + } } // If we haven't yet spilled the CRs, do so now. if (CR2Spilled || CR3Spilled || CR4Spilled) { - bool is31 = needsFP(*MF); + bool is31 = needsFP(*MF); restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, - MBB, I, CSI, CSIIndex); + MBB, I, CSI, CSIIndex); } return true; } - diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index 7aab37e188fe..c0c7d248f8d2 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -14,23 +14,18 @@ #define POWERPC_FRAMEINFO_H #include "PPC.h" -#include "PPCSubtarget.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { - class PPCSubtarget; +class PPCSubtarget; class PPCFrameLowering: public TargetFrameLowering { const PPCSubtarget &Subtarget; public: - PPCFrameLowering(const PPCSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, - (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0), - Subtarget(sti) { - } + PPCFrameLowering(const PPCSubtarget &STI); unsigned determineFrameLayout(MachineFunction &MF, bool UpdateMF = true, @@ -38,37 +33,37 @@ public: /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void emitPrologue(MachineFunction &MF) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - bool hasFP(const MachineFunction &MF) const; + bool hasFP(const MachineFunction &MF) const override; bool needsFP(const MachineFunction &MF) const; void replaceFPWithRealFP(MachineFunction &MF) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const override; /// targetHandlesStackFrameRounding - Returns true if the target is /// responsible for rounding up the stack frame (probably at emitPrologue /// time). - bool targetHandlesStackFrameRounding() const { return true; } + bool targetHandlesStackFrameRounding() const override { return true; } /// getReturnSaveOffset - Return the previous frame offset to save the /// return address. @@ -79,6 +74,12 @@ public: return isPPC64 ? 16 : 4; } + /// getTOCSaveOffset - Return the previous frame offset to save the + /// TOC register -- 64-bit SVR4 ABI only. + static unsigned getTOCSaveOffset(bool isELFv2ABI) { + return isELFv2ABI ? 24 : 40; + } + /// getFramePointerSaveOffset - Return the previous frame offset to save the /// frame pointer. static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) { @@ -96,208 +97,30 @@ public: /// getBasePointerSaveOffset - Return the previous frame offset to save the /// base pointer. - static unsigned getBasePointerSaveOffset(bool isPPC64, bool isDarwinABI) { + static unsigned getBasePointerSaveOffset(bool isPPC64, + bool isDarwinABI, + bool isPIC) { if (isDarwinABI) return isPPC64 ? -16U : -8U; // SVR4 ABI: First slot in the general register save area. - return isPPC64 ? -16U : -8U; + return isPPC64 ? -16U : isPIC ? -12U : -8U; } /// getLinkageSize - Return the size of the PowerPC ABI linkage area. /// - static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) { + static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI, + bool isELFv2ABI) { if (isDarwinABI || isPPC64) - return 6 * (isPPC64 ? 8 : 4); + return (isELFv2ABI ? 4 : 6) * (isPPC64 ? 8 : 4); // SVR4 ABI: return 8; } - /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI - /// argument area. - static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) { - // For the Darwin ABI / 64-bit SVR4 ABI: - // The prolog code of the callee may store up to 8 GPR argument registers to - // the stack, allowing va_start to index over them in memory if its varargs. - // Because we cannot tell if this is needed on the caller side, we have to - // conservatively assume that it is needed. As such, make sure we have at - // least enough stack space for the caller to store the 8 GPRs. - if (isDarwinABI || isPPC64) - return 8 * (isPPC64 ? 8 : 4); - - // 32-bit SVR4 ABI: - // There is no default stack allocated for the 8 first GPR arguments. - return 0; - } - - /// getMinCallFrameSize - Return the minimum size a call frame can be using - /// the PowerPC ABI. - static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) { - // The call frame needs to be at least big enough for linkage and 8 args. - return getLinkageSize(isPPC64, isDarwinABI) + - getMinCallArgumentsSize(isPPC64, isDarwinABI); - } - - // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. const SpillSlot * - getCalleeSavedSpillSlots(unsigned &NumEntries) const { - if (Subtarget.isDarwinABI()) { - NumEntries = 1; - if (Subtarget.isPPC64()) { - static const SpillSlot darwin64Offsets = {PPC::X31, -8}; - return &darwin64Offsets; - } else { - static const SpillSlot darwinOffsets = {PPC::R31, -4}; - return &darwinOffsets; - } - } - - // Early exit if not using the SVR4 ABI. - if (!Subtarget.isSVR4ABI()) { - NumEntries = 0; - return 0; - } - - // Note that the offsets here overlap, but this is fixed up in - // processFunctionBeforeFrameFinalized. - - static const SpillSlot Offsets[] = { - // Floating-point register save area offsets. - {PPC::F31, -8}, - {PPC::F30, -16}, - {PPC::F29, -24}, - {PPC::F28, -32}, - {PPC::F27, -40}, - {PPC::F26, -48}, - {PPC::F25, -56}, - {PPC::F24, -64}, - {PPC::F23, -72}, - {PPC::F22, -80}, - {PPC::F21, -88}, - {PPC::F20, -96}, - {PPC::F19, -104}, - {PPC::F18, -112}, - {PPC::F17, -120}, - {PPC::F16, -128}, - {PPC::F15, -136}, - {PPC::F14, -144}, - - // General register save area offsets. - {PPC::R31, -4}, - {PPC::R30, -8}, - {PPC::R29, -12}, - {PPC::R28, -16}, - {PPC::R27, -20}, - {PPC::R26, -24}, - {PPC::R25, -28}, - {PPC::R24, -32}, - {PPC::R23, -36}, - {PPC::R22, -40}, - {PPC::R21, -44}, - {PPC::R20, -48}, - {PPC::R19, -52}, - {PPC::R18, -56}, - {PPC::R17, -60}, - {PPC::R16, -64}, - {PPC::R15, -68}, - {PPC::R14, -72}, - - // CR save area offset. We map each of the nonvolatile CR fields - // to the slot for CR2, which is the first of the nonvolatile CR - // fields to be assigned, so that we only allocate one save slot. - // See PPCRegisterInfo::hasReservedSpillSlot() for more information. - {PPC::CR2, -4}, - - // VRSAVE save area offset. - {PPC::VRSAVE, -4}, - - // Vector register save area - {PPC::V31, -16}, - {PPC::V30, -32}, - {PPC::V29, -48}, - {PPC::V28, -64}, - {PPC::V27, -80}, - {PPC::V26, -96}, - {PPC::V25, -112}, - {PPC::V24, -128}, - {PPC::V23, -144}, - {PPC::V22, -160}, - {PPC::V21, -176}, - {PPC::V20, -192} - }; - - static const SpillSlot Offsets64[] = { - // Floating-point register save area offsets. - {PPC::F31, -8}, - {PPC::F30, -16}, - {PPC::F29, -24}, - {PPC::F28, -32}, - {PPC::F27, -40}, - {PPC::F26, -48}, - {PPC::F25, -56}, - {PPC::F24, -64}, - {PPC::F23, -72}, - {PPC::F22, -80}, - {PPC::F21, -88}, - {PPC::F20, -96}, - {PPC::F19, -104}, - {PPC::F18, -112}, - {PPC::F17, -120}, - {PPC::F16, -128}, - {PPC::F15, -136}, - {PPC::F14, -144}, - - // General register save area offsets. - {PPC::X31, -8}, - {PPC::X30, -16}, - {PPC::X29, -24}, - {PPC::X28, -32}, - {PPC::X27, -40}, - {PPC::X26, -48}, - {PPC::X25, -56}, - {PPC::X24, -64}, - {PPC::X23, -72}, - {PPC::X22, -80}, - {PPC::X21, -88}, - {PPC::X20, -96}, - {PPC::X19, -104}, - {PPC::X18, -112}, - {PPC::X17, -120}, - {PPC::X16, -128}, - {PPC::X15, -136}, - {PPC::X14, -144}, - - // VRSAVE save area offset. - {PPC::VRSAVE, -4}, - - // Vector register save area - {PPC::V31, -16}, - {PPC::V30, -32}, - {PPC::V29, -48}, - {PPC::V28, -64}, - {PPC::V27, -80}, - {PPC::V26, -96}, - {PPC::V25, -112}, - {PPC::V24, -128}, - {PPC::V23, -144}, - {PPC::V22, -160}, - {PPC::V21, -176}, - {PPC::V20, -192} - }; - - if (Subtarget.isPPC64()) { - NumEntries = array_lengthof(Offsets64); - - return Offsets64; - } else { - NumEntries = array_lengthof(Offsets); - - return Offsets; - } - } + getCalleeSavedSpillSlots(unsigned &NumEntries) const override; }; - } // End llvm namespace #endif diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 0df50e17dd9d..d9b242cad265 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -11,38 +11,226 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "PPCHazardRecognizers.h" #include "PPC.h" #include "PPCInstrInfo.h" +#include "PPCTargetMachine.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -//===----------------------------------------------------------------------===// -// PowerPC Scoreboard Hazard Recognizer -void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { +#define DEBUG_TYPE "pre-RA-sched" + +bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) { + // FIXME: Move this. + if (isBCTRAfterSet(SU)) + return true; + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); if (!MCID) - // This is a PPC pseudo-instruction. - return; + return false; + + if (!MCID->mayLoad()) + return false; + + // SU is a load; for any predecessors in this dispatch group, that are stores, + // and with which we have an ordering dependency, return true. + for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); + if (!PredMCID || !PredMCID->mayStore()) + continue; + + if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier()) + continue; + + for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) + if (SU->Preds[i].getSUnit() == CurGroup[j]) + return true; + } + + return false; +} + +bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (!MCID) + return false; + + if (!MCID->isBranch()) + return false; + + // SU is a branch; for any predecessors in this dispatch group, with which we + // have a data dependence and set the counter register, return true. + for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); + if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR) + continue; + + if (SU->Preds[i].isCtrl()) + continue; + + for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) + if (SU->Preds[i].getSUnit() == CurGroup[j]) + return true; + } - ScoreboardHazardRecognizer::EmitInstruction(SU); + return false; +} + +// FIXME: Remove this when we don't need this: +namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } } + +// FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific. + +bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID, + unsigned &NSlots) { + // FIXME: Indirectly, this information is contained in the itinerary, and + // we should derive it from there instead of separately specifying it + // here. + unsigned IIC = MCID->getSchedClass(); + switch (IIC) { + default: + NSlots = 1; + break; + case PPC::Sched::IIC_IntDivW: + case PPC::Sched::IIC_IntDivD: + case PPC::Sched::IIC_LdStLoadUpd: + case PPC::Sched::IIC_LdStLDU: + case PPC::Sched::IIC_LdStLFDU: + case PPC::Sched::IIC_LdStLFDUX: + case PPC::Sched::IIC_LdStLHA: + case PPC::Sched::IIC_LdStLHAU: + case PPC::Sched::IIC_LdStLWA: + case PPC::Sched::IIC_LdStSTDU: + case PPC::Sched::IIC_LdStSTFDU: + NSlots = 2; + break; + case PPC::Sched::IIC_LdStLoadUpdX: + case PPC::Sched::IIC_LdStLDUX: + case PPC::Sched::IIC_LdStLHAUX: + case PPC::Sched::IIC_LdStLWARX: + case PPC::Sched::IIC_LdStLDARX: + case PPC::Sched::IIC_LdStSTDUX: + case PPC::Sched::IIC_LdStSTDCX: + case PPC::Sched::IIC_LdStSTWCX: + case PPC::Sched::IIC_BrMCRX: // mtcr + // FIXME: Add sync/isync (here and in the itinerary). + NSlots = 4; + break; + } + + // FIXME: record-form instructions need a different itinerary class. + if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1) + NSlots = 2; + + switch (IIC) { + default: + // All multi-slot instructions must come first. + return NSlots > 1; + case PPC::Sched::IIC_BrCR: // cr logicals + case PPC::Sched::IIC_SprMFCR: + case PPC::Sched::IIC_SprMFCRF: + case PPC::Sched::IIC_SprMTSPR: + return true; + } } ScheduleHazardRecognizer::HazardType -PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { +PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + if (Stalls == 0 && isLoadAfterStore(SU)) + return NoopHazard; + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); } -void PPCScoreboardHazardRecognizer::AdvanceCycle() { - ScoreboardHazardRecognizer::AdvanceCycle(); +bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + unsigned NSlots; + if (MCID && mustComeFirst(MCID, NSlots) && CurSlots) + return true; + + return ScoreboardHazardRecognizer::ShouldPreferAnother(SU); +} + +unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { + // We only need to fill out a maximum of 5 slots here: The 6th slot could + // only be a second branch, and otherwise the next instruction will start a + // new group. + if (isLoadAfterStore(SU) && CurSlots < 6) { + unsigned Directive = + DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); + // If we're using a special group-terminating nop, then we need only one. + if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || + Directive == PPC::DIR_PWR8 ) + return 1; + + return 5 - CurSlots; + } + + return ScoreboardHazardRecognizer::PreEmitNoops(SU); } -void PPCScoreboardHazardRecognizer::Reset() { - ScoreboardHazardRecognizer::Reset(); +void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (MCID) { + if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) { + CurGroup.clear(); + CurSlots = CurBranches = 0; + } else { + DEBUG(dbgs() << "**** Adding to dispatch group: SU(" << + SU->NodeNum << "): "); + DEBUG(DAG->dumpNode(SU)); + + unsigned NSlots; + bool MustBeFirst = mustComeFirst(MCID, NSlots); + + // If this instruction must come first, but does not, then it starts a + // new group. + if (MustBeFirst && CurSlots) { + CurSlots = CurBranches = 0; + CurGroup.clear(); + } + + CurSlots += NSlots; + CurGroup.push_back(SU); + + if (MCID->isBranch()) + ++CurBranches; + } + } + + return ScoreboardHazardRecognizer::EmitInstruction(SU); +} + +void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() { + return ScoreboardHazardRecognizer::AdvanceCycle(); +} + +void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() { + llvm_unreachable("Bottom-up scheduling not supported"); +} + +void PPCDispatchGroupSBHazardRecognizer::Reset() { + CurGroup.clear(); + CurSlots = CurBranches = 0; + return ScoreboardHazardRecognizer::Reset(); +} + +void PPCDispatchGroupSBHazardRecognizer::EmitNoop() { + unsigned Directive = + DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); + // If the group has now filled all of its slots, or if we're using a special + // group-terminating nop, the group is complete. + if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || + Directive == PPC::DIR_PWR8 || CurSlots == 6) { + CurGroup.clear(); + CurSlots = CurBranches = 0; + } else { + CurGroup.push_back(nullptr); + ++CurSlots; + } } //===----------------------------------------------------------------------===// @@ -71,8 +259,8 @@ void PPCScoreboardHazardRecognizer::Reset() { // 3. Handling of the esoteric cases in "Resource-based Instruction Grouping". // -PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetMachine &TM) - : TM(TM) { +PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG) + : DAG(DAG) { EndDispatchGroup(); } @@ -91,7 +279,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, bool &isFirst, bool &isSingle, bool &isCracked, bool &isLoad, bool &isStore) { - const MCInstrDesc &MCID = TM.getInstrInfo()->get(Opcode); + const MCInstrDesc &MCID = DAG.TII->get(Opcode); isLoad = MCID.mayLoad(); isStore = MCID.mayStore(); diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index 84b8e6de4579..23f76c16d138 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -21,19 +21,30 @@ namespace llvm { -/// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based -/// hazard recognizer for generic PPC processors. -class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer { +/// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based +/// hazard recognizer for PPC ooo processors with dispatch-group hazards. +class PPCDispatchGroupSBHazardRecognizer : public ScoreboardHazardRecognizer { const ScheduleDAG *DAG; + SmallVector<SUnit *, 7> CurGroup; + unsigned CurSlots, CurBranches; + + bool isLoadAfterStore(SUnit *SU); + bool isBCTRAfterSet(SUnit *SU); + bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots); public: - PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData, + PPCDispatchGroupSBHazardRecognizer(const InstrItineraryData *ItinData, const ScheduleDAG *DAG_) : - ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {} - - virtual HazardType getHazardType(SUnit *SU, int Stalls); - virtual void EmitInstruction(SUnit *SU); - virtual void AdvanceCycle(); - virtual void Reset(); + ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_), + CurSlots(0), CurBranches(0) {} + + HazardType getHazardType(SUnit *SU, int Stalls) override; + bool ShouldPreferAnother(SUnit* SU) override; + unsigned PreEmitNoops(SUnit *SU) override; + void EmitInstruction(SUnit *SU) override; + void AdvanceCycle() override; + void RecedeCycle() override; + void Reset() override; + void EmitNoop() override; }; /// PPCHazardRecognizer970 - This class defines a finite state automata that @@ -43,7 +54,7 @@ public: /// setting the CTR register then branching through it within a dispatch group), /// or storing then loading from the same address within a dispatch group. class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { - const TargetMachine &TM; + const ScheduleDAG &DAG; unsigned NumIssued; // Number of insts issued, including advanced cycles. @@ -64,11 +75,11 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { unsigned NumStores; public: - PPCHazardRecognizer970(const TargetMachine &TM); - virtual HazardType getHazardType(SUnit *SU, int Stalls); - virtual void EmitInstruction(SUnit *SU); - virtual void AdvanceCycle(); - virtual void Reset(); + PPCHazardRecognizer970(const ScheduleDAG &DAG); + virtual HazardType getHazardType(SUnit *SU, int Stalls) override; + virtual void EmitInstruction(SUnit *SU) override; + virtual void AdvanceCycle() override; + virtual void Reset() override; private: /// EndDispatchGroup - Called when we are finishing a new dispatch group. diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index d25762a5bbca..536c2824fb7a 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ppc-codegen" #include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -27,6 +27,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -34,6 +35,12 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +#define DEBUG_TYPE "ppc-codegen" + +// FIXME: Remove this once the bug has been fixed! +cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", +cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); + namespace llvm { void initializePPCDAGToDAGISelPass(PassRegistry&); } @@ -45,29 +52,31 @@ namespace { /// class PPCDAGToDAGISel : public SelectionDAGISel { const PPCTargetMachine &TM; - const PPCTargetLowering &PPCLowering; - const PPCSubtarget &PPCSubTarget; + const PPCTargetLowering *PPCLowering; + const PPCSubtarget *PPCSubTarget; unsigned GlobalBaseReg; public: explicit PPCDAGToDAGISel(PPCTargetMachine &tm) : SelectionDAGISel(tm), TM(tm), - PPCLowering(*TM.getTargetLowering()), - PPCSubTarget(*TM.getSubtargetImpl()) { + PPCLowering(TM.getTargetLowering()), + PPCSubTarget(TM.getSubtargetImpl()) { initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { // Make sure we re-emit a set of the global base reg if necessary GlobalBaseReg = 0; + PPCLowering = TM.getTargetLowering(); + PPCSubTarget = TM.getSubtargetImpl(); SelectionDAGISel::runOnMachineFunction(MF); - if (!PPCSubTarget.isSVR4ABI()) + if (!PPCSubTarget->isSVR4ABI()) InsertVRSaveCode(MF); return true; } - virtual void PostprocessISelDAG(); + void PostprocessISelDAG() override; /// getI32Imm - Return a target constant with the specified value, of type /// i32. @@ -83,7 +92,7 @@ namespace { /// getSmallIPtrImm - Return a target constant of pointer type. inline SDValue getSmallIPtrImm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy()); + return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy()); } /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s @@ -104,7 +113,7 @@ namespace { // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. - SDNode *Select(SDNode *N); + SDNode *Select(SDNode *N) override; SDNode *SelectBitfieldInsert(SDNode *N); @@ -116,7 +125,7 @@ namespace { /// a base register plus a signed 16-bit displacement [r+imm]. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, false); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false); } /// SelectAddrImmOffs - Return true if the operand is valid for a preinc @@ -136,20 +145,20 @@ namespace { /// represented as an indexed [r+r] operation. Returns false if it can /// be represented by [r+imm], which are preferred. bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG); + return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG); } /// SelectAddrIdxOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG); + return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); } /// SelectAddrImmX4 - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement that is a multiple of 4. /// Suitable for use by STD and friends. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, true); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true); } // Select an address into a single register. @@ -163,16 +172,16 @@ namespace { /// a register. The case of adding a (possibly relocatable) constant to a /// register can be improved, but it is wrong to substitute Reg+Reg for /// Reg in an asm, because the load or store opcode would have to change. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps) { + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps) override { OutOps.push_back(Op); return false; } void InsertVRSaveCode(MachineFunction &MF); - virtual const char *getPassName() const { + const char *getPassName() const override { return "PowerPC DAG->DAG Pattern Instruction Selection"; } @@ -181,6 +190,12 @@ namespace { private: SDNode *SelectSETCC(SDNode *N); + + void PeepholePPC64(); + void PeepholeCROps(); + + bool AllUsersSelectZero(SDNode *N); + void SwapAllSelectUsers(SDNode *N); }; } @@ -260,10 +275,22 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { MachineBasicBlock::iterator MBBI = FirstMBB.begin(); DebugLoc dl; - if (PPCLowering.getPointerTy() == MVT::i32) { - GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass); + if (PPCLowering->getPointerTy() == MVT::i32) { + if (PPCSubTarget->isTargetELF()) + GlobalBaseReg = PPC::R30; + else + GlobalBaseReg = + RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); + if (PPCSubTarget->isTargetELF()) { + unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + BuildMI(FirstMBB, MBBI, dl, + TII.get(PPC::GetGBRO), TempReg).addReg(GlobalBaseReg); + BuildMI(FirstMBB, MBBI, dl, + TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg).addReg(TempReg); + MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); + } } else { GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); @@ -271,7 +298,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { } } return CurDAG->getRegister(GlobalBaseReg, - PPCLowering.getPointerTy()).getNode(); + PPCLowering->getPointerTy()).getNode(); } /// isIntS16Immediate - This method tests to see if the node is either a 32-bit @@ -403,8 +430,8 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { SDLoc dl(N); APInt LKZ, LKO, RKZ, RKO; - CurDAG->ComputeMaskedBits(Op0, LKZ, LKO); - CurDAG->ComputeMaskedBits(Op1, RKZ, RKO); + CurDAG->computeKnownBits(Op0, LKZ, LKO); + CurDAG->computeKnownBits(Op1, RKZ, RKO); unsigned TargetMask = LKZ.getZExtValue(); unsigned InsertMask = RKZ.getZExtValue(); @@ -447,11 +474,18 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; } if (Op1Opc == ISD::AND) { + // The AND mask might not be a constant, and we need to make sure that + // if we're going to fold the masking with the insert, all bits not + // know to be zero in the mask are known to be one. + APInt MKZ, MKO; + CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO); + bool CanFoldMask = InsertMask == MKO.getZExtValue(); + unsigned SHOpc = Op1.getOperand(0).getOpcode(); - if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && + if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { - // Note that Value must be in range here (less than 32) because - // otherwise there would not be any bits set in InsertMask. + // Note that Value must be in range here (less than 32) because + // otherwise there would not be any bits set in InsertMask. Op1 = Op1.getOperand(0).getOperand(0); SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; } @@ -463,7 +497,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops); } } - return 0; + return nullptr; } /// SelectCC - Select a comparison of the specified values with the specified @@ -561,7 +595,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, Opc = PPC::FCMPUS; } else { assert(LHS.getValueType() == MVT::f64 && "Unknown vt!"); - Opc = PPC::FCMPUD; + Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; } return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); } @@ -629,85 +663,108 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { // getVCmpInst: return the vector compare instruction for the specified // vector type and condition code. Since this is for altivec specific code, // only support the altivec types (v16i8, v8i16, v4i32, and v4f32). -static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) { - switch (CC) { - case ISD::SETEQ: - case ISD::SETUEQ: - case ISD::SETNE: - case ISD::SETUNE: - if (VecVT == MVT::v16i8) - return PPC::VCMPEQUB; - else if (VecVT == MVT::v8i16) - return PPC::VCMPEQUH; - else if (VecVT == MVT::v4i32) - return PPC::VCMPEQUW; - // v4f32 != v4f32 could be translate to unordered not equal - else if (VecVT == MVT::v4f32) - return PPC::VCMPEQFP; - break; - case ISD::SETLT: - case ISD::SETGT: - case ISD::SETLE: - case ISD::SETGE: - if (VecVT == MVT::v16i8) - return PPC::VCMPGTSB; - else if (VecVT == MVT::v8i16) - return PPC::VCMPGTSH; - else if (VecVT == MVT::v4i32) - return PPC::VCMPGTSW; - else if (VecVT == MVT::v4f32) - return PPC::VCMPGTFP; - break; - case ISD::SETULT: - case ISD::SETUGT: - case ISD::SETUGE: - case ISD::SETULE: - if (VecVT == MVT::v16i8) - return PPC::VCMPGTUB; - else if (VecVT == MVT::v8i16) - return PPC::VCMPGTUH; - else if (VecVT == MVT::v4i32) - return PPC::VCMPGTUW; - break; - case ISD::SETOEQ: - if (VecVT == MVT::v4f32) - return PPC::VCMPEQFP; - break; - case ISD::SETOLT: - case ISD::SETOGT: - case ISD::SETOLE: - if (VecVT == MVT::v4f32) - return PPC::VCMPGTFP; - break; - case ISD::SETOGE: - if (VecVT == MVT::v4f32) - return PPC::VCMPGEFP; - break; - default: - break; - } - llvm_unreachable("Invalid integer vector compare condition"); -} +static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, + bool HasVSX, bool &Swap, bool &Negate) { + Swap = false; + Negate = false; -// getVCmpEQInst: return the equal compare instruction for the specified vector -// type. Since this is for altivec specific code, only support the altivec -// types (v16i8, v8i16, v4i32, and v4f32). -static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) { - switch (VecVT) { - case MVT::v16i8: - return PPC::VCMPEQUB; - case MVT::v8i16: - return PPC::VCMPEQUH; - case MVT::v4i32: - return PPC::VCMPEQUW; - case MVT::v4f32: - return PPC::VCMPEQFP; - default: - llvm_unreachable("Invalid integer vector compare condition"); + if (VecVT.isFloatingPoint()) { + /* Handle some cases by swapping input operands. */ + switch (CC) { + case ISD::SETLE: CC = ISD::SETGE; Swap = true; break; + case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; + case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break; + case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break; + case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; + case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break; + default: break; + } + /* Handle some cases by negating the result. */ + switch (CC) { + case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; + case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break; + case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break; + case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break; + default: break; + } + /* We have instructions implementing the remaining cases. */ + switch (CC) { + case ISD::SETEQ: + case ISD::SETOEQ: + if (VecVT == MVT::v4f32) + return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; + else if (VecVT == MVT::v2f64) + return PPC::XVCMPEQDP; + break; + case ISD::SETGT: + case ISD::SETOGT: + if (VecVT == MVT::v4f32) + return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; + else if (VecVT == MVT::v2f64) + return PPC::XVCMPGTDP; + break; + case ISD::SETGE: + case ISD::SETOGE: + if (VecVT == MVT::v4f32) + return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; + else if (VecVT == MVT::v2f64) + return PPC::XVCMPGEDP; + break; + default: + break; + } + llvm_unreachable("Invalid floating-point vector compare condition"); + } else { + /* Handle some cases by swapping input operands. */ + switch (CC) { + case ISD::SETGE: CC = ISD::SETLE; Swap = true; break; + case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; + case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; + case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break; + default: break; + } + /* Handle some cases by negating the result. */ + switch (CC) { + case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; + case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break; + case ISD::SETLE: CC = ISD::SETGT; Negate = true; break; + case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break; + default: break; + } + /* We have instructions implementing the remaining cases. */ + switch (CC) { + case ISD::SETEQ: + case ISD::SETUEQ: + if (VecVT == MVT::v16i8) + return PPC::VCMPEQUB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPEQUH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPEQUW; + break; + case ISD::SETGT: + if (VecVT == MVT::v16i8) + return PPC::VCMPGTSB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPGTSH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPGTSW; + break; + case ISD::SETUGT: + if (VecVT == MVT::v16i8) + return PPC::VCMPGTUB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPGTUH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPGTUW; + break; + default: + break; + } + llvm_unreachable("Invalid integer vector compare condition"); } } - SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDLoc dl(N); unsigned Imm; @@ -715,7 +772,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); - if (isInt32Immediate(N->getOperand(1), Imm)) { + if (!PPCSubTarget->useCRBits() && + isInt32Immediate(N->getOperand(1), Imm)) { // We can codegen setcc op, imm very efficiently compared to a brcond. // Check for those cases here. // setcc op, 0 @@ -726,7 +784,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { case ISD::SETEQ: { Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } case ISD::SETNE: { if (isPPC64) break; @@ -738,14 +796,14 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { } case ISD::SETLT: { SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } case ISD::SETGT: { SDValue T = SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } } } else if (Imm == ~0U) { // setcc op, -1 @@ -775,7 +833,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, Op), 0); SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } case ISD::SETGT: { SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; @@ -795,56 +853,25 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { // vector compare operations return the same type as the operands. if (LHS.getValueType().isVector()) { EVT VecVT = LHS.getValueType(); - MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy; - unsigned int VCmpInst = getVCmpInst(VT, CC); - - switch (CC) { - case ISD::SETEQ: - case ISD::SETOEQ: - case ISD::SETUEQ: - return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); - case ISD::SETNE: - case ISD::SETONE: - case ISD::SETUNE: { - SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPC::VNOR, VecVT, VCmp, VCmp); - } - case ISD::SETLT: - case ISD::SETOLT: - case ISD::SETULT: - return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS); - case ISD::SETGT: - case ISD::SETOGT: - case ISD::SETUGT: - return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); - case ISD::SETGE: - case ISD::SETOGE: - case ISD::SETUGE: { - // Small optimization: Altivec provides a 'Vector Compare Greater Than - // or Equal To' instruction (vcmpgefp), so in this case there is no - // need for extra logic for the equal compare. - if (VecVT.getSimpleVT().isFloatingPoint()) { - return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); - } else { - SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); - unsigned int VCmpEQInst = getVCmpEQInst(VT); - SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ); - } - } - case ISD::SETLE: - case ISD::SETOLE: - case ISD::SETULE: { - SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0); - unsigned int VCmpEQInst = getVCmpEQInst(VT); - SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ); - } - default: - llvm_unreachable("Invalid vector compare type: should be expanded by legalize"); + bool Swap, Negate; + unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, + PPCSubTarget->hasVSX(), Swap, Negate); + if (Swap) + std::swap(LHS, RHS); + + if (Negate) { + SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : + PPC::VNOR, + VecVT, VCmp, VCmp); } + + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); } + if (PPCSubTarget->useCRBits()) + return nullptr; + bool Inv; unsigned Idx = getCRIdxForSetCC(CC, Inv); SDValue CCReg = SelectCC(LHS, RHS, CC, dl); @@ -853,7 +880,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { // Force the ccreg into CR7. SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); - SDValue InFlag(0, 0); // Null incoming flag value. + SDValue InFlag(nullptr, 0); // Null incoming flag value. CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, InFlag).getValue(1); @@ -863,7 +890,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31), getI32Imm(31), getI32Imm(31) }; if (!Inv) - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); // Get the specified bit. SDValue Tmp = @@ -878,7 +905,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); if (N->isMachineOpcode()) { N->setNodeId(-1); - return NULL; // Already selected. + return nullptr; // Already selected. } switch (N->getOpcode()) { @@ -959,8 +986,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { break; } - case ISD::SETCC: - return SelectSETCC(N); + case ISD::SETCC: { + SDNode *SN = SelectSETCC(N); + if (SN) + return SN; + break; + } case PPCISD::GlobalBaseReg: return getGlobalBaseReg(); @@ -1056,7 +1087,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), - PPCLowering.getPointerTy(), + PPCLowering->getPointerTy(), MVT::Other, Ops); } else { unsigned Opcode; @@ -1091,7 +1122,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Base, Offset, Chain }; return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), - PPCLowering.getPointerTy(), + PPCLowering->getPointerTy(), MVT::Other, Ops); } } @@ -1106,7 +1137,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) { SDValue Val = N->getOperand(0).getOperand(0); SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } // If this is just a masked value where the input is not handled above, and // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm @@ -1115,20 +1146,34 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { N->getOperand(0).getOpcode() != ISD::ROTL) { SDValue Val = N->getOperand(0); SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } // If this is a 64-bit zero-extension mask, emit rldicl. if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && isMask_64(Imm64)) { SDValue Val = N->getOperand(0); MB = 64 - CountTrailingOnes_64(Imm64); - SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) }; - return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3); + SH = 0; + + // If the operand is a logical right shift, we can fold it into this + // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) + // for n <= mb. The right shift is really a left rotate followed by a + // mask, and this mask is a more-restrictive sub-mask of the mask implied + // by the shift. + if (Val.getOpcode() == ISD::SRL && + isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { + assert(Imm < 64 && "Illegal shift amount"); + Val = Val.getOperand(0); + SH = 64 - Imm; + } + + SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB) }; + return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); } // AND X, 0 -> 0, not "rlwinm 32". if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { ReplaceUses(SDValue(N, 0), N->getOperand(1)); - return NULL; + return nullptr; } // ISD::OR doesn't get all the bitfield insertion fun. // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert @@ -1161,7 +1206,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { isRotateAndMask(N, Imm, true, SH, MB, ME)) { SDValue Ops[] = { N->getOperand(0).getOperand(0), getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } // Other cases are autogenerated. @@ -1173,17 +1218,45 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { isRotateAndMask(N, Imm, true, SH, MB, ME)) { SDValue Ops[] = { N->getOperand(0).getOperand(0), getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } // Other cases are autogenerated. break; } + // FIXME: Remove this once the ANDI glue bug is fixed: + case PPCISD::ANDIo_1_EQ_BIT: + case PPCISD::ANDIo_1_GT_BIT: { + if (!ANDIGlueBug) + break; + + EVT InVT = N->getOperand(0).getValueType(); + assert((InVT == MVT::i64 || InVT == MVT::i32) && + "Invalid input type for ANDIo_1_EQ_BIT"); + + unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo; + SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue, + N->getOperand(0), + CurDAG->getTargetConstant(1, InVT)), 0); + SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); + SDValue SRIdxVal = + CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ? + PPC::sub_eq : PPC::sub_gt, MVT::i32); + + return CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, + CR0Reg, SRIdxVal, + SDValue(AndI.getNode(), 1) /* glue */); + } case ISD::SELECT_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); + // If this is a select of i1 operands, we'll pattern match it. + if (PPCSubTarget->useCRBits() && + N->getOperand(0).getValueType() == MVT::i1) + break; + // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc if (!isPPC64) if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) @@ -1202,6 +1275,36 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { } SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); + + if (N->getValueType(0) == MVT::i1) { + // An i1 select is: (c & t) | (!c & f). + bool Inv; + unsigned Idx = getCRIdxForSetCC(CC, Inv); + + unsigned SRI; + switch (Idx) { + default: llvm_unreachable("Invalid CC index"); + case 0: SRI = PPC::sub_lt; break; + case 1: SRI = PPC::sub_gt; break; + case 2: SRI = PPC::sub_eq; break; + case 3: SRI = PPC::sub_un; break; + } + + SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg); + + SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1, + CCBit, CCBit), 0); + SDValue C = Inv ? NotCCBit : CCBit, + NotC = Inv ? CCBit : NotCCBit; + + SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, + C, N->getOperand(2)), 0); + SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, + NotC, N->getOperand(3)), 0); + + return CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); + } + unsigned BROpc = getPredicateForSetCC(CC); unsigned SelectCCOp; @@ -1218,16 +1321,60 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), getI32Imm(BROpc) }; - return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4); + return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); } + case ISD::VSELECT: + if (PPCSubTarget->hasVSX()) { + SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) }; + return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops); + } + + break; + case ISD::VECTOR_SHUFFLE: + if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || + N->getValueType(0) == MVT::v2i64)) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + + SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), + Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1); + unsigned DM[2]; + + for (int i = 0; i < 2; ++i) + if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2) + DM[i] = 0; + else + DM[i] = 1; + + SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), MVT::i32); + + if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && + Op1.getOpcode() == ISD::SCALAR_TO_VECTOR && + isa<LoadSDNode>(Op1.getOperand(0))) { + LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0)); + SDValue Base, Offset; + + if (LD->isUnindexed() && + SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { + SDValue Chain = LD->getChain(); + SDValue Ops[] = { Base, Offset, Chain }; + return CurDAG->SelectNodeTo(N, PPC::LXVDSX, + N->getValueType(0), Ops); + } + } + + SDValue Ops[] = { Op1, Op2, DMV }; + return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); + } + + break; case PPCISD::BDNZ: case PPCISD::BDZ: { - bool IsPPC64 = PPCSubTarget.isPPC64(); + bool IsPPC64 = PPCSubTarget->isPPC64(); SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), - MVT::Other, Ops, 2); + MVT::Other, Ops); } case PPCISD::COND_BRANCH: { // Op #0 is the Chain. @@ -1240,14 +1387,36 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()); SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), N->getOperand(0), N->getOperand(4) }; - return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5); + return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); } case ISD::BR_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); + unsigned PCC = getPredicateForSetCC(CC); + + if (N->getOperand(2).getValueType() == MVT::i1) { + unsigned Opc; + bool Swap; + switch (PCC) { + default: llvm_unreachable("Unexpected Boolean-operand predicate"); + case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break; + case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break; + case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break; + case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break; + case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break; + case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; + } + + SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, + N->getOperand(Swap ? 3 : 2), + N->getOperand(Swap ? 2 : 3)), 0); + return CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, + BitComp, N->getOperand(4), N->getOperand(0)); + } + SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); - SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode, + SDValue Ops[] = { getI32Imm(PCC), CondCode, N->getOperand(4), N->getOperand(0) }; - return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); } case ISD::BRIND: { // FIXME: Should custom lower this. @@ -1260,7 +1429,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); } case PPCISD::TOC_ENTRY: { - assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI"); + if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) { + SDValue GA = N->getOperand(0); + return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, + N->getOperand(1)); + } + assert (PPCSubTarget->isPPC64() && + "Only supported for 64-bit ABI and 32-bit SVR4"); // For medium and large code model, we generate two instructions as // described below. Otherwise we allow SelectCodeCommon to handle this, @@ -1269,10 +1444,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if (CModel != CodeModel::Medium && CModel != CodeModel::Large) break; - // The first source operand is a TargetGlobalAddress or a - // TargetJumpTable. If it is an externally defined symbol, a symbol - // with common linkage, a function address, or a jump table address, - // or if we are generating code for large code model, we generate: + // The first source operand is a TargetGlobalAddress or a TargetJumpTable. + // If it is an externally defined symbol, a symbol with common linkage, + // a non-local function address, or a jump table address, or if we are + // generating code for large code model, we generate: // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>)) // Otherwise we generate: // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>) @@ -1287,18 +1462,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { const GlobalValue *GValue = G->getGlobal(); - const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); - const GlobalValue *RealGValue = GAlias ? - GAlias->resolveAliasedGlobal(false) : GValue; - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); - assert((GVar || isa<Function>(RealGValue)) && - "Unexpected global value subclass!"); - - // An external variable is one without an initializer. For these, - // for variables with common linkage, and for Functions, generate - // the LDtocL form. - if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || - RealGValue->hasAvailableExternallyLinkage()) + if ((GValue->getType()->getElementType()->isFunctionTy() && + (GValue->isDeclaration() || GValue->isWeakForLinker())) || + GValue->isDeclaration() || GValue->hasCommonLinkage() || + GValue->hasAvailableExternallyLinkage()) return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0)); } @@ -1382,7 +1549,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } -/// PostProcessISelDAG - Perform some late peephole optimizations +/// PostprocessISelDAG - Perform some late peephole optimizations /// on the DAG representation. void PPCDAGToDAGISel::PostprocessISelDAG() { @@ -1390,8 +1557,480 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { if (TM.getOptLevel() == CodeGenOpt::None) return; + PeepholePPC64(); + PeepholeCROps(); +} + +// Check if all users of this node will become isel where the second operand +// is the constant zero. If this is so, and if we can negate the condition, +// then we can flip the true and false operands. This will allow the zero to +// be folded with the isel so that we don't need to materialize a register +// containing zero. +bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { + // If we're not using isel, then this does not matter. + if (!PPCSubTarget->hasISEL()) + return false; + + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (!User->isMachineOpcode()) + return false; + if (User->getMachineOpcode() != PPC::SELECT_I4 && + User->getMachineOpcode() != PPC::SELECT_I8) + return false; + + SDNode *Op2 = User->getOperand(2).getNode(); + if (!Op2->isMachineOpcode()) + return false; + + if (Op2->getMachineOpcode() != PPC::LI && + Op2->getMachineOpcode() != PPC::LI8) + return false; + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0)); + if (!C) + return false; + + if (!C->isNullValue()) + return false; + } + + return true; +} + +void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { + SmallVector<SDNode *, 4> ToReplace; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + assert((User->getMachineOpcode() == PPC::SELECT_I4 || + User->getMachineOpcode() == PPC::SELECT_I8) && + "Must have all select users"); + ToReplace.push_back(User); + } + + for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(), + UE = ToReplace.end(); UI != UE; ++UI) { + SDNode *User = *UI; + SDNode *ResNode = + CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User), + User->getValueType(0), User->getOperand(0), + User->getOperand(2), + User->getOperand(1)); + + DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); + DEBUG(User->dump(CurDAG)); + DEBUG(dbgs() << "\nNew: "); + DEBUG(ResNode->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + ReplaceUses(User, ResNode); + } +} + +void PPCDAGToDAGISel::PeepholeCROps() { + bool IsModified; + do { + IsModified = false; + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ++I) { + MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I); + if (!MachineNode || MachineNode->use_empty()) + continue; + SDNode *ResNode = MachineNode; + + bool Op1Set = false, Op1Unset = false, + Op1Not = false, + Op2Set = false, Op2Unset = false, + Op2Not = false; + + unsigned Opcode = MachineNode->getMachineOpcode(); + switch (Opcode) { + default: break; + case PPC::CRAND: + case PPC::CRNAND: + case PPC::CROR: + case PPC::CRXOR: + case PPC::CRNOR: + case PPC::CREQV: + case PPC::CRANDC: + case PPC::CRORC: { + SDValue Op = MachineNode->getOperand(1); + if (Op.isMachineOpcode()) { + if (Op.getMachineOpcode() == PPC::CRSET) + Op2Set = true; + else if (Op.getMachineOpcode() == PPC::CRUNSET) + Op2Unset = true; + else if (Op.getMachineOpcode() == PPC::CRNOR && + Op.getOperand(0) == Op.getOperand(1)) + Op2Not = true; + } + } // fallthrough + case PPC::BC: + case PPC::BCn: + case PPC::SELECT_I4: + case PPC::SELECT_I8: + case PPC::SELECT_F4: + case PPC::SELECT_F8: + case PPC::SELECT_VRRC: { + SDValue Op = MachineNode->getOperand(0); + if (Op.isMachineOpcode()) { + if (Op.getMachineOpcode() == PPC::CRSET) + Op1Set = true; + else if (Op.getMachineOpcode() == PPC::CRUNSET) + Op1Unset = true; + else if (Op.getMachineOpcode() == PPC::CRNOR && + Op.getOperand(0) == Op.getOperand(1)) + Op1Not = true; + } + } + break; + } + + bool SelectSwap = false; + switch (Opcode) { + default: break; + case PPC::CRAND: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // x & x = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Set) + // 1 & y = y + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op2Set) + // x & 1 = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Unset || Op2Unset) + // x & 0 = 0 & y = 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Not) + // ~x & y = andc(y, x) + ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(0). + getOperand(0)); + else if (Op2Not) + // x & ~y = andc(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1)), + SelectSwap = true; + break; + case PPC::CRNAND: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // nand(x, x) -> nor(x, x) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Set) + // nand(1, y) -> nor(y, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op2Set) + // nand(x, 1) -> nor(x, x) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Unset || Op2Unset) + // nand(x, 0) = nand(0, y) = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Not) + // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // nand(x, ~y) = ~x | y = orc(y, x) + ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1). + getOperand(0), + MachineNode->getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1)), + SelectSwap = true; + break; + case PPC::CROR: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // x | x = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Set || Op2Set) + // x | 1 = 1 | y = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Unset) + // 0 | y = y + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op2Unset) + // x | 0 = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Not) + // ~x | y = orc(y, x) + ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(0). + getOperand(0)); + else if (Op2Not) + // x | ~y = orc(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1)), + SelectSwap = true; + break; + case PPC::CRXOR: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // xor(x, x) = 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Set) + // xor(1, y) -> nor(y, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op2Set) + // xor(x, 1) -> nor(x, x) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Unset) + // xor(0, y) = y + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op2Unset) + // xor(x, 0) = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Not) + // xor(~x, y) = eqv(x, y) + ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // xor(x, ~y) = eqv(x, y) + ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1)), + SelectSwap = true; + break; + case PPC::CRNOR: + if (Op1Set || Op2Set) + // nor(1, y) -> 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Unset) + // nor(0, y) = ~y -> nor(y, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op2Unset) + // nor(x, 0) = ~x + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Not) + // nor(~x, y) = andc(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // nor(x, ~y) = andc(y, x) + ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1). + getOperand(0), + MachineNode->getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1)), + SelectSwap = true; + break; + case PPC::CREQV: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // eqv(x, x) = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Set) + // eqv(1, y) = y + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op2Set) + // eqv(x, 1) = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Unset) + // eqv(0, y) = ~y -> nor(y, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op2Unset) + // eqv(x, 0) = ~x + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(0)); + else if (Op1Not) + // eqv(~x, y) = xor(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // eqv(x, ~y) = xor(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1)), + SelectSwap = true; + break; + case PPC::CRANDC: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // andc(x, x) = 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Set) + // andc(1, y) = ~y + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op1Unset || Op2Set) + // andc(0, y) = andc(x, 1) = 0 + ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), + MVT::i1); + else if (Op2Unset) + // andc(x, 0) = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Not) + // andc(~x, y) = ~(x | y) = nor(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // andc(x, ~y) = x & y + ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(0)), + SelectSwap = true; + break; + case PPC::CRORC: + if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) + // orc(x, x) = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op1Set || Op2Unset) + // orc(1, y) = orc(x, 0) = 1 + ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), + MVT::i1); + else if (Op2Set) + // orc(x, 1) = x + ResNode = MachineNode->getOperand(0).getNode(); + else if (Op1Unset) + // orc(0, y) = ~y + ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(1)); + else if (Op1Not) + // orc(~x, y) = ~(x & y) = nand(x, y) + ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1)); + else if (Op2Not) + // orc(x, ~y) = x | y + ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(0), + MachineNode->getOperand(1). + getOperand(0)); + else if (AllUsersSelectZero(MachineNode)) + ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), + MVT::i1, MachineNode->getOperand(1), + MachineNode->getOperand(0)), + SelectSwap = true; + break; + case PPC::SELECT_I4: + case PPC::SELECT_I8: + case PPC::SELECT_F4: + case PPC::SELECT_F8: + case PPC::SELECT_VRRC: + if (Op1Set) + ResNode = MachineNode->getOperand(1).getNode(); + else if (Op1Unset) + ResNode = MachineNode->getOperand(2).getNode(); + else if (Op1Not) + ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(), + SDLoc(MachineNode), + MachineNode->getValueType(0), + MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(2), + MachineNode->getOperand(1)); + break; + case PPC::BC: + case PPC::BCn: + if (Op1Not) + ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn : + PPC::BC, + SDLoc(MachineNode), + MVT::Other, + MachineNode->getOperand(0). + getOperand(0), + MachineNode->getOperand(1), + MachineNode->getOperand(2)); + // FIXME: Handle Op1Set, Op1Unset here too. + break; + } + + // If we're inverting this node because it is used only by selects that + // we'd like to swap, then swap the selects before the node replacement. + if (SelectSwap) + SwapAllSelectUsers(MachineNode); + + if (ResNode != MachineNode) { + DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); + DEBUG(MachineNode->dump(CurDAG)); + DEBUG(dbgs() << "\nNew: "); + DEBUG(ResNode->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + ReplaceUses(MachineNode, ResNode); + IsModified = true; + } + } + if (IsModified) + CurDAG->RemoveDeadNodes(); + } while (IsModified); +} + +void PPCDAGToDAGISel::PeepholePPC64() { // These optimizations are currently supported only for 64-bit SVR4. - if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64()) + if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64()) return; SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); @@ -1549,8 +2188,8 @@ FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { static void initializePassOnce(PassRegistry &Registry) { const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection"; - PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0, - false, false); + PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, + nullptr, false, false); Registry.registerPass(*PI, true); } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 25a7ca7f59a7..708d36f6f978 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -18,6 +18,8 @@ #include "PPCTargetMachine.h" #include "PPCTargetObjectFile.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -46,20 +48,21 @@ cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hi static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); -static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { - if (TM.getSubtargetImpl()->isDarwin()) - return new TargetLoweringObjectFileMachO(); +// FIXME: Remove this once the bug has been fixed! +extern cl::opt<bool> ANDIGlueBug; - if (TM.getSubtargetImpl()->isSVR4ABI()) - return new PPC64LinuxTargetObjectFile(); +static TargetLoweringObjectFile *createTLOF(const Triple &TT) { + // If it isn't a Mach-O file then it's going to be a linux ELF + // object file. + if (TT.isOSDarwin()) + return new TargetLoweringObjectFileMachO(); - return new TargetLoweringObjectFileELF(); + return new PPC64LinuxTargetObjectFile(); } PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) - : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { - const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); - + : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))), + Subtarget(*TM.getSubtargetImpl()) { setPow2DivIsCheap(); // Use _setjmp/_longjmp instead of setjmp/longjmp. @@ -68,7 +71,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. - bool isPPC64 = Subtarget->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); setMinStackArgumentAlignment(isPPC64 ? 8:4); // Set up the register classes. @@ -94,6 +97,39 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); + if (Subtarget.useCRBits()) { + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + if (isPPC64 || Subtarget.hasFPCVT()) { + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); + AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); + AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + } else { + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); + } + + // PowerPC does not support direct load / store of condition registers + setOperationAction(ISD::LOAD, MVT::i1, Custom); + setOperationAction(ISD::STORE, MVT::i1, Custom); + + // FIXME: Remove this once the ANDI glue bug is fixed: + if (ANDIGlueBug) + setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); + + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setTruncStoreAction(MVT::i64, MVT::i1, Expand); + setTruncStoreAction(MVT::i32, MVT::i1, Expand); + setTruncStoreAction(MVT::i16, MVT::i1, Expand); + setTruncStoreAction(MVT::i8, MVT::i1, Expand); + + addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); + } + // This is used in the ppcf128->int sequence. Note it has different semantics // from FP_ROUND: that rounds to nearest, this rounds to zero. setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); @@ -139,17 +175,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root - if (!Subtarget->hasFSQRT() && + if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && - Subtarget->hasFRSQRTE() && Subtarget->hasFRE())) + Subtarget.hasFRSQRTE() && Subtarget.hasFRE())) setOperationAction(ISD::FSQRT, MVT::f64, Expand); - if (!Subtarget->hasFSQRT() && + if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && - Subtarget->hasFRSQRTES() && Subtarget->hasFRES())) + Subtarget.hasFRSQRTES() && Subtarget.hasFRES())) setOperationAction(ISD::FSQRT, MVT::f32, Expand); - if (Subtarget->hasFCPSGN()) { + if (Subtarget.hasFCPSGN()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); } else { @@ -157,7 +193,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); } - if (Subtarget->hasFPRND()) { + if (Subtarget.hasFPRND()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FTRUNC, MVT::f64, Legal); @@ -179,7 +215,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); - if (Subtarget->hasPOPCNTD()) { + if (Subtarget.hasPOPCNTD()) { setOperationAction(ISD::CTPOP, MVT::i32 , Legal); setOperationAction(ISD::CTPOP, MVT::i64 , Legal); } else { @@ -191,21 +227,25 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::ROTR, MVT::i32 , Expand); setOperationAction(ISD::ROTR, MVT::i64 , Expand); - // PowerPC does not have Select - setOperationAction(ISD::SELECT, MVT::i32, Expand); - setOperationAction(ISD::SELECT, MVT::i64, Expand); - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::f64, Expand); + if (!Subtarget.useCRBits()) { + // PowerPC does not have Select + setOperationAction(ISD::SELECT, MVT::i32, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + } // PowerPC wants to turn select_cc of FP into fsel when possible. setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // PowerPC wants to optimize integer setcc a bit - setOperationAction(ISD::SETCC, MVT::i32, Custom); + if (!Subtarget.useCRBits()) + setOperationAction(ISD::SETCC, MVT::i32, Custom); // PowerPC does not have BRCOND which requires SetCC - setOperationAction(ISD::BRCOND, MVT::Other, Expand); + if (!Subtarget.useCRBits()) + setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -256,7 +296,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - if (Subtarget->isSVR4ABI()) { + if (Subtarget.isSVR4ABI()) { if (isPPC64) { // VAARG always uses double-word chunks, so promote anything smaller. setOperationAction(ISD::VAARG, MVT::i1, Promote); @@ -276,7 +316,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } else setOperationAction(ISD::VAARG, MVT::Other, Expand); - if (Subtarget->isSVR4ABI() && !isPPC64) + if (Subtarget.isSVR4ABI() && !isPPC64) // VACOPY is custom lowered with the 32-bit SVR4 ABI. setOperationAction(ISD::VACOPY , MVT::Other, Custom); else @@ -309,7 +349,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setCondCodeAction(ISD::SETONE, MVT::f32, Expand); setCondCodeAction(ISD::SETONE, MVT::f64, Expand); - if (Subtarget->has64BitSupport()) { + if (Subtarget.has64BitSupport()) { // They also have instructions for converting between i64 and fp. setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); @@ -319,7 +359,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We cannot do this with Promote because i64 is not a legal type. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64()) + if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. @@ -327,8 +367,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } // With the instructions enabled under FPCVT, we can do everything. - if (PPCSubTarget.hasFPCVT()) { - if (Subtarget->has64BitSupport()) { + if (Subtarget.hasFPCVT()) { + if (Subtarget.has64BitSupport()) { setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); @@ -341,7 +381,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); } - if (Subtarget->use64BitRegs()) { + if (Subtarget.use64BitRegs()) { // 64-bit PowerPC implementations can support i64 types directly addRegisterClass(MVT::i64, &PPC::G8RCRegClass); // BUILD_PAIR can't be handled natively, and should be expanded to shl/or @@ -357,7 +397,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } - if (Subtarget->hasAltivec()) { + if (Subtarget.hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -413,12 +453,15 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::BUILD_VECTOR, VT, Expand); + setOperationAction(ISD::MULHU, VT, Expand); + setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); + setOperationAction(ISD::BSWAP, VT, Expand); setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); @@ -445,7 +488,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::OR , MVT::v4i32, Legal); setOperationAction(ISD::XOR , MVT::v4i32, Legal); setOperationAction(ISD::LOAD , MVT::v4i32, Legal); - setOperationAction(ISD::SELECT, MVT::v4i32, Expand); + setOperationAction(ISD::SELECT, MVT::v4i32, + Subtarget.useCRBits() ? Legal : Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); @@ -464,7 +508,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::MUL, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); - if (TM.Options.UnsafeFPMath) { + if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } @@ -484,16 +528,83 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // Altivec does not contain unordered floating-point compare instructions setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); - setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand); - setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); - setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); - setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); - setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); + + if (Subtarget.hasVSX()) { + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); + + setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); + setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::FROUND, MVT::v2f64, Legal); + + setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + + setOperationAction(ISD::MUL, MVT::v2f64, Legal); + setOperationAction(ISD::FMA, MVT::v2f64, Legal); + + setOperationAction(ISD::FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); + + setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); + setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); + setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); + setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); + setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); + + // Share the Altivec comparison restrictions. + setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); + setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); + + setOperationAction(ISD::LOAD, MVT::v2f64, Legal); + setOperationAction(ISD::STORE, MVT::v2f64, Legal); + + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); + + addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); + + addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); + addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); + + // VSX v2i64 only supports non-arithmetic operations. + setOperationAction(ISD::ADD, MVT::v2i64, Expand); + setOperationAction(ISD::SUB, MVT::v2i64, Expand); + + setOperationAction(ISD::SHL, MVT::v2i64, Expand); + setOperationAction(ISD::SRA, MVT::v2i64, Expand); + setOperationAction(ISD::SRL, MVT::v2i64, Expand); + + setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + + setOperationAction(ISD::LOAD, MVT::v2i64, Promote); + AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); + setOperationAction(ISD::STORE, MVT::v2i64, Promote); + AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); + + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); + + setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + + // Vector operation legalization checks the result type of + // SIGN_EXTEND_INREG, overall legalization checks the inner type. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); + + addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); + } } - if (Subtarget->has64BitSupport()) { + if (Subtarget.has64BitSupport()) { setOperationAction(ISD::PREFETCH, MVT::Other, Legal); setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); } @@ -507,6 +618,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // Altivec instructions set fields to all zeros or all ones. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + if (!isPPC64) { + // These libcalls are not available in 32-bit. + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); + } + if (isPPC64) { setStackPointerRegisterToSaveRestore(PPC::X1); setExceptionPointerRegister(PPC::X3); @@ -522,9 +640,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::BR_CC); + if (Subtarget.useCRBits()) + setTargetDAGCombine(ISD::BRCOND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::ANY_EXTEND); + + if (Subtarget.useCRBits()) { + setTargetDAGCombine(ISD::TRUNCATE); + setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::SELECT_CC); + } + // Use reciprocal estimates. if (TM.Options.UnsafeFPMath) { setTargetDAGCombine(ISD::FDIV); @@ -532,7 +662,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } // Darwin long double math library functions have $LDBL128 appended. - if (Subtarget->isDarwin()) { + if (Subtarget.isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); @@ -545,18 +675,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); } + // With 32 condition bits, we don't need to sink (and duplicate) compares + // aggressively in CodeGenPrep. + if (Subtarget.useCRBits()) + setHasMultipleConditionRegisters(); + setMinFunctionAlignment(2); - if (PPCSubTarget.isDarwin()) + if (Subtarget.isDarwin()) setPrefFunctionAlignment(4); - if (isPPC64 && Subtarget->isJITCodeModel()) + if (isPPC64 && Subtarget.isJITCodeModel()) // Temporary workaround for the inability of PPC64 JIT to handle jump // tables. setSupportJumpTables(false); setInsertFencesForAtomic(true); - if (Subtarget->enableMachineScheduler()) + if (Subtarget.enableMachineScheduler()) setSchedulingPreference(Sched::Source); else setSchedulingPreference(Sched::Hybrid); @@ -565,8 +700,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // The Freescale cores does better with aggressive inlining of memcpy and // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). - if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || - Subtarget->getDarwinDirective() == PPC::DIR_E5500) { + if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || + Subtarget.getDarwinDirective() == PPC::DIR_E5500) { MaxStoresPerMemset = 32; MaxStoresPerMemsetOptSize = 16; MaxStoresPerMemcpy = 32; @@ -610,20 +745,20 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, /// function arguments in the caller parameter area. unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { // Darwin passes everything on 4 byte boundary. - if (PPCSubTarget.isDarwin()) + if (Subtarget.isDarwin()) return 4; // 16byte and wider vectors are passed on 16byte boundary. // The rest is 8 on PPC64 and 4 on PPC32 boundary. - unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4; - if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX()) - getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16); + unsigned Align = Subtarget.isPPC64() ? 8 : 4; + if (Subtarget.hasAltivec() || Subtarget.hasQPX()) + getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); return Align; } const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; + default: return nullptr; case PPCISD::FSEL: return "PPCISD::FSEL"; case PPCISD::FCFID: return "PPCISD::FCFID"; case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; @@ -637,7 +772,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; - case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE"; case PPCISD::LOAD: return "PPCISD::LOAD"; case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC"; case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; @@ -670,6 +804,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; + case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; @@ -688,7 +823,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { if (!VT.isVector()) - return MVT::i32; + return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; return VT.changeVectorElementTypeToInteger(); } @@ -717,15 +852,29 @@ static bool isConstantOrUndef(int Op, int Val) { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. -bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { - if (!isUnary) { +/// The ShuffleKind distinguishes between big-endian operations with +/// two different inputs (0), either-endian operations with two identical +/// inputs (1), and little-endian operantion with two different inputs (2). +/// For the latter, the input operands are swapped (see PPCInstrAltivec.td). +bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, + SelectionDAG &DAG) { + if (ShuffleKind == 0) { + if (DAG.getTarget().getDataLayout()->isLittleEndian()) + return false; for (unsigned i = 0; i != 16; ++i) - if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) + if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) return false; - } else { + } else if (ShuffleKind == 2) { + if (!DAG.getTarget().getDataLayout()->isLittleEndian()) + return false; + for (unsigned i = 0; i != 16; ++i) + if (!isConstantOrUndef(N->getMaskElt(i), i*2)) + return false; + } else if (ShuffleKind == 1) { + unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1; for (unsigned i = 0; i != 8; ++i) - if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) || - !isConstantOrUndef(N->getMaskElt(i+8), i*2+1)) + if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) return false; } return true; @@ -733,18 +882,33 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. -bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { - if (!isUnary) { +/// The ShuffleKind distinguishes between big-endian operations with +/// two different inputs (0), either-endian operations with two identical +/// inputs (1), and little-endian operantion with two different inputs (2). +/// For the latter, the input operands are swapped (see PPCInstrAltivec.td). +bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, + SelectionDAG &DAG) { + if (ShuffleKind == 0) { + if (DAG.getTarget().getDataLayout()->isLittleEndian()) + return false; for (unsigned i = 0; i != 16; i += 2) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) return false; - } else { + } else if (ShuffleKind == 2) { + if (!DAG.getTarget().getDataLayout()->isLittleEndian()) + return false; + for (unsigned i = 0; i != 16; i += 2) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) + return false; + } else if (ShuffleKind == 1) { + unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 2; for (unsigned i = 0; i != 8; i += 2) - if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || - !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) || - !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) || - !isConstantOrUndef(N->getMaskElt(i+9), i*2+3)) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) return false; } return true; @@ -754,8 +918,8 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { /// static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart) { - assert(N->getValueType(0) == MVT::v16i8 && - "PPC only supports shuffles by bytes!"); + if (N->getValueType(0) != MVT::v16i8) + return false; assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && "Unsupported merge size!"); @@ -771,29 +935,66 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, } /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for -/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). +/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). +/// The ShuffleKind distinguishes between big-endian merges with two +/// different inputs (0), either-endian merges with two identical inputs (1), +/// and little-endian merges with two different inputs (2). For the latter, +/// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary) { - if (!isUnary) - return isVMerge(N, UnitSize, 8, 24); - return isVMerge(N, UnitSize, 8, 8); + unsigned ShuffleKind, SelectionDAG &DAG) { + if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + if (ShuffleKind == 1) // unary + return isVMerge(N, UnitSize, 0, 0); + else if (ShuffleKind == 2) // swapped + return isVMerge(N, UnitSize, 0, 16); + else + return false; + } else { + if (ShuffleKind == 1) // unary + return isVMerge(N, UnitSize, 8, 8); + else if (ShuffleKind == 0) // normal + return isVMerge(N, UnitSize, 8, 24); + else + return false; + } } /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for -/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). +/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). +/// The ShuffleKind distinguishes between big-endian merges with two +/// different inputs (0), either-endian merges with two identical inputs (1), +/// and little-endian merges with two different inputs (2). For the latter, +/// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary) { - if (!isUnary) - return isVMerge(N, UnitSize, 0, 16); - return isVMerge(N, UnitSize, 0, 0); + unsigned ShuffleKind, SelectionDAG &DAG) { + if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + if (ShuffleKind == 1) // unary + return isVMerge(N, UnitSize, 8, 8); + else if (ShuffleKind == 2) // swapped + return isVMerge(N, UnitSize, 8, 24); + else + return false; + } else { + if (ShuffleKind == 1) // unary + return isVMerge(N, UnitSize, 0, 0); + else if (ShuffleKind == 0) // normal + return isVMerge(N, UnitSize, 0, 16); + else + return false; + } } /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. -int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { - assert(N->getValueType(0) == MVT::v16i8 && - "PPC only supports shuffles by bytes!"); +/// The ShuffleKind distinguishes between big-endian operations with two +/// different inputs (0), either-endian operations with two identical inputs +/// (1), and little-endian operations with two different inputs (2). For the +/// latter, the input operands are swapped (see PPCInstrAltivec.td). +int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, + SelectionDAG &DAG) { + if (N->getValueType(0) != MVT::v16i8) + return -1; ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); @@ -808,19 +1009,26 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { // numbered from this value. unsigned ShiftAmt = SVOp->getMaskElt(i); if (ShiftAmt < i) return -1; + ShiftAmt -= i; + bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian(); - if (!isUnary) { + if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) return -1; - } else { + } else if (ShuffleKind == 1) { // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) return -1; - } + } else + return -1; + + if (ShuffleKind == 2 && isLE) + ShiftAmt = 16 - ShiftAmt; + return ShiftAmt; } @@ -872,10 +1080,14 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) { /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. -unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { +unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, + SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); assert(isSplatShuffleMask(SVOp, EltSize)); - return SVOp->getMaskElt(0) / EltSize; + if (DAG.getTarget().getDataLayout()->isLittleEndian()) + return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); + else + return SVOp->getMaskElt(0) / EltSize; } /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed @@ -883,7 +1095,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { /// the constant being splatted. The ByteSize field indicates the number of /// bytes of each element [124] -> [bhw]. SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { - SDValue OpVal(0, 0); + SDValue OpVal(nullptr, 0); // If ByteSize of the splat is bigger than the element size of the // build_vector, then we have a case where we are checking for a splat where @@ -902,7 +1114,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); - if (UniquedVals[i&(Multiple-1)].getNode() == 0) + if (!UniquedVals[i&(Multiple-1)].getNode()) UniquedVals[i&(Multiple-1)] = N->getOperand(i); else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) return SDValue(); // no match. @@ -917,21 +1129,21 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { bool LeadingZero = true; bool LeadingOnes = true; for (unsigned i = 0; i != Multiple-1; ++i) { - if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs. + if (!UniquedVals[i].getNode()) continue; // Must have been undefs. LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); } // Finally, check the least significant entry. if (LeadingZero) { - if (UniquedVals[Multiple-1].getNode() == 0) + if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); if (Val < 16) return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) } if (LeadingOnes) { - if (UniquedVals[Multiple-1].getNode() == 0) + if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) @@ -944,13 +1156,13 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { // Check to see if this buildvec has a single non-undef value in its elements. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; - if (OpVal.getNode() == 0) + if (!OpVal.getNode()) OpVal = N->getOperand(i); else if (OpVal != N->getOperand(i)) return SDValue(); } - if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def. + if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. unsigned ValSizeInBytes = EltSize; uint64_t Value = 0; @@ -999,7 +1211,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { /// sign extension from a 16-bit value. If so, this returns true and the /// immediate. static bool isIntS16Immediate(SDNode *N, short &Imm) { - if (N->getOpcode() != ISD::Constant) + if (!isa<ConstantSDNode>(N)) return false; Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); @@ -1038,12 +1250,12 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, // disjoint. APInt LHSKnownZero, LHSKnownOne; APInt RHSKnownZero, RHSKnownOne; - DAG.ComputeMaskedBits(N.getOperand(0), - LHSKnownZero, LHSKnownOne); + DAG.computeKnownBits(N.getOperand(0), + LHSKnownZero, LHSKnownOne); if (LHSKnownZero.getBoolValue()) { - DAG.ComputeMaskedBits(N.getOperand(1), - RHSKnownZero, RHSKnownOne); + DAG.computeKnownBits(N.getOperand(1), + RHSKnownZero, RHSKnownOne); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. if (~(LHSKnownZero | RHSKnownZero) == 0) { @@ -1143,12 +1355,18 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. APInt LHSKnownZero, LHSKnownOne; - DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); + DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. - Base = N.getOperand(0); + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { + Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); + fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); + } else { + Base = N.getOperand(0); + } Disp = DAG.getTargetConstant(imm, N.getValueType()); return true; } @@ -1161,7 +1379,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, + Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } @@ -1212,7 +1430,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, } // Otherwise, do it the hard way, using R0 as the base register. - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, + Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, N.getValueType()); Index = N; return true; @@ -1303,14 +1521,14 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, /// GetLabelAccessInfo - Return true if we should reference labels using a /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, - unsigned &LoOpFlags, const GlobalValue *GV = 0) { + unsigned &LoOpFlags, + const GlobalValue *GV = nullptr) { HiOpFlags = PPCII::MO_HA; LoOpFlags = PPCII::MO_LO; - // Don't use the pic base if not in PIC relocation model. Or if we are on a - // non-darwin platform. We don't support PIC on other platforms yet. - bool isPIC = TM.getRelocationModel() == Reloc::PIC_ && - TM.getSubtarget<PPCSubtarget>().isDarwin(); + // Don't use the pic base if not in PIC relocation model. + bool isPIC = TM.getRelocationModel() == Reloc::PIC_; + if (isPIC) { HiOpFlags |= PPCII::MO_PIC_FLAG; LoOpFlags |= PPCII::MO_PIC_FLAG; @@ -1358,7 +1576,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA, DAG.getRegister(PPC::X2, MVT::i64)); @@ -1366,6 +1584,15 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); + + if (isPIC && Subtarget.isSVR4ABI()) { + SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), + PPCII::MO_PIC_FLAG); + SDLoc DL(CP); + return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, + DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); + } + SDValue CPIHi = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); SDValue CPILo = @@ -1379,7 +1606,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA, DAG.getRegister(PPC::X2, MVT::i64)); @@ -1387,6 +1614,15 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); + + if (isPIC && Subtarget.isSVR4ABI()) { + SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, + PPCII::MO_PIC_FLAG); + SDLoc DL(GA); + return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA, + DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); + } + SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); @@ -1416,7 +1652,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(); - bool is64bit = PPCSubTarget.isPPC64(); + bool is64bit = Subtarget.isPPC64(); TLSModel::Model Model = getTargetMachine().getTLSModel(GV); @@ -1431,18 +1667,19 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); } - if (!is64bit) - llvm_unreachable("only local-exec is currently supported for ppc32"); - if (Model == TLSModel::InitialExec) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLS); - SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); - SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, - PtrVT, GOTReg, TGA); + SDValue GOTPtr; + if (is64bit) { + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, + PtrVT, GOTReg, TGA); + } else + GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, - PtrVT, TGA, TPOffsetHi); + PtrVT, TGA, GOTPtr); return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); } @@ -1506,7 +1743,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, DAG.getRegister(PPC::X2, MVT::i64)); @@ -1515,6 +1752,14 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV); + if (isPIC && Subtarget.isSVR4ABI()) { + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, + GSDN->getOffset(), + PPCII::MO_PIC_FLAG); + return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, + DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32)); + } + SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); SDValue GALo = @@ -1534,6 +1779,27 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); SDLoc dl(Op); + if (Op.getValueType() == MVT::v2i64) { + // When the operands themselves are v2i64 values, we need to do something + // special because VSX has no underlying comparison operations for these. + if (Op.getOperand(0).getValueType() == MVT::v2i64) { + // Equality can be handled by casting to the legal type for Altivec + // comparisons, everything else needs to be expanded. + if (CC == ISD::SETEQ || CC == ISD::SETNE) { + return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, + DAG.getSetCC(dl, MVT::v4i32, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), + CC)); + } + + return SDValue(); + } + + // We handle most of these in the usual way. + return Op; + } + // If we're comparing for equality to zero, expose the fact that this is // implented as a ctlz/srl pair on ppc, so that the dag combiner can // fold the new nodes. @@ -1727,17 +1993,13 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, Entry.Node = Nest; Args.push_back(Entry); // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) - TargetLowering::CallLoweringInfo CLI(Chain, - Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, - CallingConv::C, - /*isTailCall=*/false, - /*doesNotRet=*/false, - /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__trampoline_setup", PtrVT), - Args, DAG, dl); - std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__trampoline_setup", PtrVT), + std::move(Args), 0); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.second; } @@ -1858,7 +2120,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const uint16_t ArgRegs[] = { + static const MCPhysReg ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; @@ -1885,7 +2147,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const uint16_t ArgRegs[] = { + static const MCPhysReg ArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; @@ -1909,8 +2171,8 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, /// GetFPR - Get the set of FP registers that should be allocated for arguments, /// on Darwin. -static const uint16_t *GetFPR() { - static const uint16_t FPR[] = { +static const MCPhysReg *GetFPR() { + static const MCPhysReg FPR[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 }; @@ -1922,14 +2184,119 @@ static const uint16_t *GetFPR() { /// the stack. static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize) { - unsigned ArgSize = ArgVT.getSizeInBits()/8; + unsigned ArgSize = ArgVT.getStoreSize(); if (Flags.isByVal()) ArgSize = Flags.getByValSize(); - ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + + // Round up to multiples of the pointer size, except for array members, + // which are always packed. + if (!Flags.isInConsecutiveRegs()) + ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; return ArgSize; } +/// CalculateStackSlotAlignment - Calculates the alignment of this argument +/// on the stack. +static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, + ISD::ArgFlagsTy Flags, + unsigned PtrByteSize) { + unsigned Align = PtrByteSize; + + // Altivec parameters are padded to a 16 byte boundary. + if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || + ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || + ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) + Align = 16; + + // ByVal parameters are aligned as requested. + if (Flags.isByVal()) { + unsigned BVAlign = Flags.getByValAlign(); + if (BVAlign > PtrByteSize) { + if (BVAlign % PtrByteSize != 0) + llvm_unreachable( + "ByVal alignment is not a multiple of the pointer size"); + + Align = BVAlign; + } + } + + // Array members are always packed to their original alignment. + if (Flags.isInConsecutiveRegs()) { + // If the array member was split into multiple registers, the first + // needs to be aligned to the size of the full type. (Except for + // ppcf128, which is only aligned as its f64 components.) + if (Flags.isSplit() && OrigVT != MVT::ppcf128) + Align = OrigVT.getStoreSize(); + else + Align = ArgVT.getStoreSize(); + } + + return Align; +} + +/// CalculateStackSlotUsed - Return whether this argument will use its +/// stack slot (instead of being passed in registers). ArgOffset, +/// AvailableFPRs, and AvailableVRs must hold the current argument +/// position, and will be updated to account for this argument. +static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, + ISD::ArgFlagsTy Flags, + unsigned PtrByteSize, + unsigned LinkageSize, + unsigned ParamAreaSize, + unsigned &ArgOffset, + unsigned &AvailableFPRs, + unsigned &AvailableVRs) { + bool UseMemory = false; + + // Respect alignment of argument on the stack. + unsigned Align = + CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); + ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; + // If there's no space left in the argument save area, we must + // use memory (this check also catches zero-sized arguments). + if (ArgOffset >= LinkageSize + ParamAreaSize) + UseMemory = true; + + // Allocate argument on the stack. + ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); + if (Flags.isInConsecutiveRegsLast()) + ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + // If we overran the argument save area, we must use memory + // (this check catches arguments passed partially in memory) + if (ArgOffset > LinkageSize + ParamAreaSize) + UseMemory = true; + + // However, if the argument is actually passed in an FPR or a VR, + // we don't use memory after all. + if (!Flags.isByVal()) { + if (ArgVT == MVT::f32 || ArgVT == MVT::f64) + if (AvailableFPRs > 0) { + --AvailableFPRs; + return false; + } + if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || + ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || + ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) + if (AvailableVRs > 0) { + --AvailableVRs; + return false; + } + } + + return UseMemory; +} + +/// EnsureStackAlignment - Round stack frame size up from NumBytes to +/// ensure minimum alignment required for target. +static unsigned EnsureStackAlignment(const TargetMachine &Target, + unsigned NumBytes) { + unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment(); + unsigned AlignMask = TargetAlign - 1; + NumBytes = (NumBytes + AlignMask) & ~AlignMask; + return NumBytes; +} + SDValue PPCTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -1938,8 +2305,8 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - if (PPCSubTarget.isSVR4ABI()) { - if (PPCSubTarget.isPPC64()) + if (Subtarget.isSVR4ABI()) { + if (Subtarget.isPPC64()) return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); else @@ -2005,7 +2372,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( getTargetMachine(), ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. - CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false); + CCInfo.AllocateStack(LinkageSize, PtrByteSize); CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); @@ -2020,6 +2388,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( switch (ValVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("ValVT not supported by formal arguments Lowering"); + case MVT::i1: case MVT::i32: RC = &PPC::GPRCRegClass; break; @@ -2027,7 +2396,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( RC = &PPC::F4RCRegClass; break; case MVT::f64: - RC = &PPC::F8RCRegClass; + if (Subtarget.hasVSX()) + RC = &PPC::VSFRCRegClass; + else + RC = &PPC::F8RCRegClass; break; case MVT::v16i8: case MVT::v8i16: @@ -2035,18 +2407,26 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( case MVT::v4f32: RC = &PPC::VRRCRegClass; break; + case MVT::v2f64: + case MVT::v2i64: + RC = &PPC::VSHRCRegClass; + break; } // Transform the arguments stored in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, + ValVT == MVT::i1 ? MVT::i32 : ValVT); + + if (ValVT == MVT::i1) + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); InVals.push_back(ArgValue); } else { // Argument stored in memory. assert(VA.isMemLoc()); - unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; + unsigned ArgSize = VA.getLocVT().getStoreSize(); int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), isImmutable); @@ -2072,36 +2452,27 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Area that is at least reserved in the caller of this function. unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); + MinReservedArea = std::max(MinReservedArea, LinkageSize); // Set the size that is at least reserved in caller of this function. Tail // call optimized function's reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. - PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); - - MinReservedArea = - std::max(MinReservedArea, - PPCFrameLowering::getMinCallFrameSize(false, false)); - - unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> - getStackAlignment(); - unsigned AlignMask = TargetAlign-1; - MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; - - FI->setMinReservedArea(MinReservedArea); + MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); + FuncInfo->setMinReservedArea(MinReservedArea); SmallVector<SDValue, 8> MemOps; // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - static const uint16_t GPArgRegs[] = { + static const MCPhysReg GPArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); - static const uint16_t FPArgRegs[] = { + static const MCPhysReg FPArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; @@ -2163,8 +2534,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( } if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, - MVT::Other, &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } @@ -2182,33 +2552,7 @@ PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); -} - -// Set the size that is at least reserved in caller of this function. Tail -// call optimized functions' reserved stack space needs to be aligned so that -// taking the difference between two stack areas will result in an aligned -// stack. -void -PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG, - unsigned nAltivecParamsAtEnd, - unsigned MinReservedArea, - bool isPPC64) const { - PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); - // Add the Altivec parameters at the end, if needed. - if (nAltivecParamsAtEnd) { - MinReservedArea = ((MinReservedArea+15)/16)*16; - MinReservedArea += 16*nAltivecParamsAtEnd; - } - MinReservedArea = - std::max(MinReservedArea, - PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); - unsigned TargetAlign - = DAG.getMachineFunction().getTarget().getFrameLowering()-> - getStackAlignment(); - unsigned AlignMask = TargetAlign-1; - MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; - FI->setMinReservedArea(MinReservedArea); + return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); } SDValue @@ -2221,6 +2565,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SmallVectorImpl<SDValue> &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // + bool isELFv2ABI = Subtarget.isELFv2ABI(); + bool isLittleEndian = Subtarget.isLittleEndian(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); @@ -2231,63 +2577,75 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( (CallConv == CallingConv::Fast)); unsigned PtrByteSize = 8; - unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); - // Area that is at least reserved in caller of this function. - unsigned MinReservedArea = ArgOffset; + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, + isELFv2ABI); - static const uint16_t GPR[] = { + static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const uint16_t *FPR = GetFPR(); + static const MCPhysReg *FPR = GetFPR(); - static const uint16_t VR[] = { + static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; + static const MCPhysReg VSRH[] = { + PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, + PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 + }; const unsigned Num_GPR_Regs = array_lengthof(GPR); const unsigned Num_FPR_Regs = 13; const unsigned Num_VR_Regs = array_lengthof(VR); - unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + // Do a first pass over the arguments to determine whether the ABI + // guarantees that our caller has allocated the parameter save area + // on its stack frame. In the ELFv1 ABI, this is always the case; + // in the ELFv2 ABI, it is true if this is a vararg function or if + // any parameter is located in a stack slot. + + bool HasParameterArea = !isELFv2ABI || isVarArg; + unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; + unsigned NumBytes = LinkageSize; + unsigned AvailableFPRs = Num_FPR_Regs; + unsigned AvailableVRs = Num_VR_Regs; + for (unsigned i = 0, e = Ins.size(); i != e; ++i) + if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, + PtrByteSize, LinkageSize, ParamAreaSize, + NumBytes, AvailableFPRs, AvailableVRs)) + HasParameterArea = true; // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, // although the first ones are often in registers. + unsigned ArgOffset = LinkageSize; + unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; SmallVector<SDValue, 8> MemOps; - unsigned nAltivecParamsAtEnd = 0; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; - unsigned ObjSize = ObjectVT.getSizeInBits()/8; + EVT OrigVT = Ins[ArgNo].ArgVT; + unsigned ObjSize = ObjectVT.getStoreSize(); unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); CurArgIdx = Ins[ArgNo].OrigArgIndex; + /* Respect alignment of argument on the stack. */ + unsigned Align = + CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); + ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; unsigned CurArgOffset = ArgOffset; - // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. - if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || - ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { - if (isVarArg) { - MinReservedArea = ((MinReservedArea+15)/16)*16; - MinReservedArea += CalculateStackSlotSize(ObjectVT, - Flags, - PtrByteSize); - } else - nAltivecParamsAtEnd++; - } else - // Calculate min reserved area. - MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, - Flags, - PtrByteSize); + /* Compute GPR index associated with argument offset. */ + GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; + GPR_idx = std::min(GPR_idx, Num_GPR_Regs); // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. @@ -2309,21 +2667,31 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( continue; } - unsigned BVAlign = Flags.getByValAlign(); - if (BVAlign > 8) { - ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; - CurArgOffset = ArgOffset; - } - - // All aggregates smaller than 8 bytes must be passed right-justified. - if (ObjSize < PtrByteSize) - CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); - // The value of the object is its address. - int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); + // Create a stack object covering all stack doublewords occupied + // by the argument. If the argument is (fully or partially) on + // the stack, or if the argument is fully in registers but the + // caller has allocated the parameter save anyway, we can refer + // directly to the caller's stack frame. Otherwise, create a + // local copy in our own frame. + int FI; + if (HasParameterArea || + ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) + FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true); + else + FI = MFI->CreateStackObject(ArgSize, Align, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - InVals.push_back(FIN); - if (ObjSize < 8) { + // Handle aggregates smaller than 8 bytes. + if (ObjSize < PtrByteSize) { + // The value of the object is its address, which differs from the + // address of the enclosing doubleword on big-endian systems. + SDValue Arg = FIN; + if (!isLittleEndian) { + SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT); + Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); + } + InVals.push_back(Arg); + if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); @@ -2332,25 +2700,19 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( if (ObjSize==1 || ObjSize==2 || ObjSize==4) { EVT ObjType = (ObjSize == 1 ? MVT::i8 : (ObjSize == 2 ? MVT::i16 : MVT::i32)); - Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, + Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, MachinePointerInfo(FuncArg), ObjType, false, false, 0); } else { // For sizes that don't fit a truncating store (3, 5, 6, 7), // store the whole register as-is to the parameter save area - // slot. The address of the parameter was already calculated - // above (InVals.push_back(FIN)) to be the right-justified - // offset within the slot. For this store, we need a new - // frame index that points at the beginning of the slot. - int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); - SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + // slot. Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(FuncArg), false, false, 0); } MemOps.push_back(Store); - ++GPR_idx; } // Whether we copied from a register or not, advance the offset // into the parameter save area by a full doubleword. @@ -2358,44 +2720,48 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( continue; } + // The value of the object is its address, which is the address of + // its first stack doubleword. + InVals.push_back(FIN); + + // Store whatever pieces of the object are in registers to memory. for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { - // Store whatever pieces of the object are in registers - // to memory. ArgOffset will be the address of the beginning - // of the object. - if (GPR_idx != Num_GPR_Regs) { - unsigned VReg; - VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); - int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); - SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, j), - false, false, 0); - MemOps.push_back(Store); - ++GPR_idx; - ArgOffset += PtrByteSize; - } else { - ArgOffset += ArgSize - j; + if (GPR_idx == Num_GPR_Regs) break; + + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + SDValue Addr = FIN; + if (j) { + SDValue Off = DAG.getConstant(j, PtrVT); + Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); } + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, + MachinePointerInfo(FuncArg, j), + false, false, 0); + MemOps.push_back(Store); + ++GPR_idx; } + ArgOffset += ArgSize; continue; } switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); + case MVT::i1: case MVT::i32: case MVT::i64: + // These can be scalar arguments or elements of an integer array type + // passed directly. Clang may use those instead of "byval" aggregate + // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); - if (ObjectVT == MVT::i32) + if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); - - ++GPR_idx; } else { needsLoad = true; ArgSize = PtrByteSize; @@ -2405,63 +2771,76 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::f32: case MVT::f64: - // Every 8 bytes of argument space consumes one of the GPRs available for - // argument passing. - if (GPR_idx != Num_GPR_Regs) { - ++GPR_idx; - } + // These can be scalar arguments or elements of a float array type + // passed directly. The latter are used to implement ELFv2 homogenous + // float aggregates. if (FPR_idx != Num_FPR_Regs) { unsigned VReg; if (ObjectVT == MVT::f32) VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); else - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); + VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ? + &PPC::VSFRCRegClass : + &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; + } else if (GPR_idx != Num_GPR_Regs) { + // This can only ever happen in the presence of f32 array types, + // since otherwise we never run out of FPRs before running out + // of GPRs. + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); + + if (ObjectVT == MVT::f32) { + if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) + ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, + DAG.getConstant(32, MVT::i32)); + ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); + } + + ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); } else { needsLoad = true; - ArgSize = PtrByteSize; } - ArgOffset += 8; + // When passing an array of floats, the array occupies consecutive + // space in the argument area; only round up to the next doubleword + // at the end of the array. Otherwise, each float takes 8 bytes. + ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; + ArgOffset += ArgSize; + if (Flags.isInConsecutiveRegsLast()) + ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; break; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: - // Note that vector arguments in registers don't reserve stack space, - // except in varargs functions. + case MVT::v2f64: + case MVT::v2i64: + // These can be scalar arguments or elements of a vector array type + // passed directly. The latter are used to implement ELFv2 homogenous + // vector aggregates. if (VR_idx != Num_VR_Regs) { - unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); + unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ? + MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) : + MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); - if (isVarArg) { - while ((ArgOffset % 16) != 0) { - ArgOffset += PtrByteSize; - if (GPR_idx != Num_GPR_Regs) - GPR_idx++; - } - ArgOffset += 16; - GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? - } ++VR_idx; } else { - // Vectors are aligned. - ArgOffset = ((ArgOffset+15)/16)*16; - CurArgOffset = ArgOffset; - ArgOffset += 16; needsLoad = true; } + ArgOffset += 16; break; } // We need to load the argument to a virtual register if we determined // above that we ran out of physical registers of the appropriate type. if (needsLoad) { - int FI = MFI->CreateFixedObject(ObjSize, - CurArgOffset + (ArgSize - ObjSize), - isImmutable); + if (ObjSize < ArgSize && !isLittleEndian) + CurArgOffset += ArgSize - ObjSize; + int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), false, false, false, 0); @@ -2470,11 +2849,19 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( InVals.push_back(ArgVal); } + // Area that is at least reserved in the caller of this function. + unsigned MinReservedArea; + if (HasParameterArea) + MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); + else + MinReservedArea = LinkageSize; + // Set the size that is at least reserved in caller of this function. Tail // call optimized functions' reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. - setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true); + MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); + FuncInfo->setMinReservedArea(MinReservedArea); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. @@ -2488,7 +2875,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by deferencing the // result of va_next. - for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { + for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; + GPR_idx < Num_GPR_Regs; ++GPR_idx) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -2501,8 +2889,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( } if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, - MVT::Other, &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } @@ -2528,22 +2915,24 @@ PPCTargetLowering::LowerFormalArguments_Darwin( (CallConv == CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; - unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true, + false); + unsigned ArgOffset = LinkageSize; // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; - static const uint16_t GPR_32[] = { // 32-bit registers. + static const MCPhysReg GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; - static const uint16_t GPR_64[] = { // 64-bit registers. + static const MCPhysReg GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const uint16_t *FPR = GetFPR(); + static const MCPhysReg *FPR = GetFPR(); - static const uint16_t VR[] = { + static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; @@ -2554,7 +2943,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; + const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; // In 32-bit non-varargs functions, the stack space for vectors is after the // stack space for non-vectors. We do not use this space unless we have @@ -2581,6 +2970,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( switch(ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); + case MVT::i1: case MVT::i32: case MVT::f32: VecArgOffset += 4; @@ -2704,11 +3094,16 @@ PPCTargetLowering::LowerFormalArguments_Darwin( switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); + case MVT::i1: case MVT::i32: if (!isPPC64) { if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); + + if (ObjectVT == MVT::i1) + ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); + ++GPR_idx; } else { needsLoad = true; @@ -2724,7 +3119,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); - if (ObjectVT == MVT::i32) + if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); @@ -2813,11 +3208,21 @@ PPCTargetLowering::LowerFormalArguments_Darwin( InVals.push_back(ArgVal); } + // Allow for Altivec parameters at the end, if needed. + if (nAltivecParamsAtEnd) { + MinReservedArea = ((MinReservedArea+15)/16)*16; + MinReservedArea += 16*nAltivecParamsAtEnd; + } + + // Area that is at least reserved in the caller of this function. + MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); + // Set the size that is at least reserved in caller of this function. Tail // call optimized functions' reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. - setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64); + MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); + FuncInfo->setMinReservedArea(MinReservedArea); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. @@ -2851,80 +3256,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin( } if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, - MVT::Other, &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } -/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus -/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI. -static unsigned -CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, - bool isPPC64, - bool isVarArg, - unsigned CC, - const SmallVectorImpl<ISD::OutputArg> - &Outs, - const SmallVectorImpl<SDValue> &OutVals, - unsigned &nAltivecParamsAtEnd) { - // Count how many bytes are to be pushed on the stack, including the linkage - // area, and parameter passing area. We start with 24/48 bytes, which is - // prereserved space for [SP][CR][LR][3 x unused]. - unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true); - unsigned NumOps = Outs.size(); - unsigned PtrByteSize = isPPC64 ? 8 : 4; - - // Add up all the space actually used. - // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually - // they all go in registers, but we must reserve stack space for them for - // possible use by the caller. In varargs or 64-bit calls, parameters are - // assigned stack space in order, with padding so Altivec parameters are - // 16-byte aligned. - nAltivecParamsAtEnd = 0; - for (unsigned i = 0; i != NumOps; ++i) { - ISD::ArgFlagsTy Flags = Outs[i].Flags; - EVT ArgVT = Outs[i].VT; - // Varargs Altivec parameters are padded to a 16 byte boundary. - if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || - ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { - if (!isVarArg && !isPPC64) { - // Non-varargs Altivec parameters go after all the non-Altivec - // parameters; handle those later so we know how much padding we need. - nAltivecParamsAtEnd++; - continue; - } - // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. - NumBytes = ((NumBytes+15)/16)*16; - } - NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); - } - - // Allow for Altivec parameters at the end, if needed. - if (nAltivecParamsAtEnd) { - NumBytes = ((NumBytes+15)/16)*16; - NumBytes += 16*nAltivecParamsAtEnd; - } - - // The prolog code of the callee may store up to 8 GPR argument registers to - // the stack, allowing va_start to index over them in memory if its varargs. - // Because we cannot tell if this is needed on the caller side, we have to - // conservatively assume that it is needed. As such, make sure we have at - // least enough stack space for the caller to store the 8 GPRs. - NumBytes = std::max(NumBytes, - PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); - - // Tail call needs the stack to be aligned. - if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){ - unsigned TargetAlign = DAG.getMachineFunction().getTarget(). - getFrameLowering()->getStackAlignment(); - unsigned AlignMask = TargetAlign-1; - NumBytes = (NumBytes + AlignMask) & ~AlignMask; - } - - return NumBytes; -} - /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be /// adjusted to accommodate the arguments for the tailcall. static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, @@ -2967,7 +3303,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (Flags.isByVal()) return false; } - // Non PIC/GOT tail calls are supported. + // Non-PIC/GOT tail calls are supported. if (getTargetMachine().getRelocationModel() != Reloc::PIC_) return true; @@ -2985,12 +3321,12 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, /// 32-bit value is representable in the immediate field of a BxA instruction. static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); - if (!C) return 0; + if (!C) return nullptr; int Addr = C->getZExtValue(); if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. SignExtend32<26>(Addr) != Addr) - return 0; // Top 6 bits have to be sext of immediate. + return nullptr; // Top 6 bits have to be sext of immediate. return DAG.getConstant((int)C->getZExtValue() >> 2, DAG.getTargetLoweringInfo().getPointerTy()).getNode(); @@ -3096,7 +3432,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, SDLoc dl) const { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. - EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; + EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), false, false, false, 0); @@ -3126,8 +3462,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, SDLoc dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - false, false, MachinePointerInfo(0), - MachinePointerInfo(0)); + false, false, MachinePointerInfo(), + MachinePointerInfo()); } /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of @@ -3172,8 +3508,7 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, MemOpChains2, dl); if (!MemOpChains2.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains2[0], MemOpChains2.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, @@ -3190,10 +3525,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, - const PPCSubtarget &PPCSubTarget) { + const PPCSubtarget &Subtarget) { - bool isPPC64 = PPCSubTarget.isPPC64(); - bool isSVR4ABI = PPCSubTarget.isSVR4ABI(); + bool isPPC64 = Subtarget.isPPC64(); + bool isSVR4ABI = Subtarget.isSVR4ABI(); + bool isELFv2ABI = Subtarget.isELFv2ABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); NodeTys.push_back(MVT::Other); // Returns a chain @@ -3202,11 +3538,12 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, unsigned CallOpc = PPCISD::CALL; bool needIndirectCall = true; - if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { - // If this is an absolute destination address, use the munged value. - Callee = SDValue(Dest, 0); - needIndirectCall = false; - } + if (!isSVR4ABI || !isPPC64) + if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { + // If this is an absolute destination address, use the munged value. + Callee = SDValue(Dest, 0); + needIndirectCall = false; + } if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 @@ -3214,15 +3551,18 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // far-call stubs may be outside relocation limits for a BL instruction. if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { unsigned OpFlags = 0; - if (DAG.getTarget().getRelocationModel() != Reloc::Static && - (PPCSubTarget.getTargetTriple().isMacOSX() && - PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && + if ((DAG.getTarget().getRelocationModel() != Reloc::Static && + (Subtarget.getTargetTriple().isMacOSX() && + Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && (G->getGlobal()->isDeclaration() || - G->getGlobal()->isWeakForLinker())) { + G->getGlobal()->isWeakForLinker())) || + (Subtarget.isTargetELF() && !isPPC64 && + !G->getGlobal()->hasLocalLinkage() && + DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. - OpFlags = PPCII::MO_DARWIN_STUB; + OpFlags = PPCII::MO_PLT_OR_STUB; } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, @@ -3238,13 +3578,15 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { unsigned char OpFlags = 0; - if (DAG.getTarget().getRelocationModel() != Reloc::Static && - (PPCSubTarget.getTargetTriple().isMacOSX() && - PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { + if ((DAG.getTarget().getRelocationModel() != Reloc::Static && + (Subtarget.getTargetTriple().isMacOSX() && + Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) || + (Subtarget.isTargetELF() && !isPPC64 && + DAG.getTarget().getRelocationModel() == Reloc::PIC_) ) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. - OpFlags = PPCII::MO_DARWIN_STUB; + OpFlags = PPCII::MO_PLT_OR_STUB; } Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), @@ -3257,7 +3599,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; - if (isSVR4ABI && isPPC64) { + if (isSVR4ABI && isPPC64 && !isELFv2ABI) { // Function pointers in the 64-bit SVR4 ABI do not point to the function // entry point, but to the function descriptor (the function entry point // address is part of the function descriptor though). @@ -3287,8 +3629,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // Load the address of the function entry point from the function // descriptor. SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue); - SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps, - InFlag.getNode() ? 3 : 2); + SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, + makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); Chain = LoadFuncPtr.getValue(1); InFlag = LoadFuncPtr.getValue(2); @@ -3314,8 +3656,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // additional register being allocated and an unnecessary move instruction // being generated. VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue TOCOff = DAG.getIntPtrConstant(8); + SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, - Callee, InFlag); + AddTOC, InFlag); Chain = LoadTOCPtr.getValue(0); InFlag = LoadTOCPtr.getValue(1); @@ -3324,8 +3668,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, MTCTROps[2] = InFlag; } - Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, - 2 + (InFlag.getNode() != 0)); + Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, + makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); InFlag = Chain.getValue(1); NodeTys.clear(); @@ -3333,9 +3677,9 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, NodeTys.push_back(MVT::Glue); Ops.push_back(Chain); CallOpc = PPCISD::BCTRL; - Callee.setNode(0); + Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) - if (isSVR4ABI && isPPC64) + if (isSVR4ABI && isPPC64 && !isELFv2ABI) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) @@ -3357,6 +3701,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); + // Direct calls in the ELFv2 ABI need the TOC register live into the call. + if (Callee.getNode() && isELFv2ABI) + Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); + return CallOpc; } @@ -3426,14 +3774,16 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, int SPDiff, unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, SmallVectorImpl<SDValue> &InVals) const { + + bool isELFv2ABI = Subtarget.isELFv2ABI(); std::vector<EVT> NodeTys; SmallVector<SDValue, 8> Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, isTailCall, RegsToPass, Ops, NodeTys, - PPCSubTarget); + Subtarget); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls - if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) + if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); // When performing tail call optimization the callee pops its arguments off @@ -3461,7 +3811,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, isa<ConstantSDNode>(Callee)) && "Expecting an global address, external symbol, absolute value or register"); - return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); + return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); } // Add a NOP immediately after the branch instruction when using the 64-bit @@ -3474,7 +3824,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // same TOC), the NOP will remain unchanged. bool needsTOCRestore = false; - if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { + if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) { if (CallOpc == PPCISD::BCTRL) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. @@ -3494,12 +3844,17 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, } } - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); if (needsTOCRestore) { SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); + unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI); + SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset); + SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); + Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag); InFlag = Chain.getValue(1); } @@ -3531,8 +3886,12 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); - if (PPCSubTarget.isSVR4ABI()) { - if (PPCSubTarget.isPPC64()) + if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); + + if (Subtarget.isSVR4ABI()) { + if (Subtarget.isPPC64()) return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, isTailCall, Outs, OutVals, Ins, dl, DAG, InVals); @@ -3585,7 +3944,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, getTargetMachine(), ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. - CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); + CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false), + PtrByteSize); if (isVarArg) { // Handle fixed and variable vector arguments differently. @@ -3611,7 +3971,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, errs() << "Call operand #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << "\n"; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } else { @@ -3705,6 +4065,9 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, } if (VA.isRegLoc()) { + if (Arg.getValueType() == MVT::i1) + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); + seenFloatArg |= VA.getLocVT().isFloatingPoint(); // Put argument in a physical register. RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); @@ -3729,8 +4092,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. @@ -3748,7 +4110,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, SDValue Ops[] = { Chain, InFlag }; Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, - dl, VTs, Ops, InFlag.getNode() ? 2 : 1); + dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); InFlag = Chain.getValue(1); } @@ -3792,6 +4154,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { + bool isELFv2ABI = Subtarget.isELFv2ABI(); + bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -3808,16 +4172,44 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallConv == CallingConv::Fast) MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); - unsigned nAltivecParamsAtEnd = 0; - // Count how many bytes are to be pushed on the stack, including the linkage - // area, and parameter passing area. We start with at least 48 bytes, which - // is reserved space for [SP][CR][LR][3 x unused]. - // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result - // of this call. - unsigned NumBytes = - CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv, - Outs, OutVals, nAltivecParamsAtEnd); + // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes + // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage + // area is 32 bytes reserved space for [SP][CR][LR][TOC]. + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, + isELFv2ABI); + unsigned NumBytes = LinkageSize; + + // Add up all the space actually used. + for (unsigned i = 0; i != NumOps; ++i) { + ISD::ArgFlagsTy Flags = Outs[i].Flags; + EVT ArgVT = Outs[i].VT; + EVT OrigVT = Outs[i].ArgVT; + + /* Respect alignment of argument on the stack. */ + unsigned Align = + CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); + NumBytes = ((NumBytes + Align - 1) / Align) * Align; + + NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); + if (Flags.isInConsecutiveRegsLast()) + NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + } + + unsigned NumBytesActuallyUsed = NumBytes; + + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. + NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); + + // Tail call needs the stack to be aligned. + if (getTargetMachine().Options.GuaranteedTailCallOpt && + CallConv == CallingConv::Fast) + NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes); // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. @@ -3849,19 +4241,24 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. - unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); - unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + unsigned ArgOffset = LinkageSize; + unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; - static const uint16_t GPR[] = { + static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const uint16_t *FPR = GetFPR(); + static const MCPhysReg *FPR = GetFPR(); - static const uint16_t VR[] = { + static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; + static const MCPhysReg VSRH[] = { + PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, + PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 + }; + const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); @@ -3873,6 +4270,17 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, for (unsigned i = 0; i != NumOps; ++i) { SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; + EVT ArgVT = Outs[i].VT; + EVT OrigVT = Outs[i].ArgVT; + + /* Respect alignment of argument on the stack. */ + unsigned Align = + CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); + ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; + + /* Compute GPR index associated with argument offset. */ + GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; + GPR_idx = std::min(GPR_idx, NumGPRs); // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. @@ -3883,7 +4291,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); // Promote integers to 64-bit values. - if (Arg.getValueType() == MVT::i32) { + if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { // FIXME: Should this use ANY_EXTEND if neither sext nor zext? unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); @@ -3905,15 +4313,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (Size == 0) continue; - unsigned BVAlign = Flags.getByValAlign(); - if (BVAlign > 8) { - if (BVAlign % PtrByteSize != 0) - llvm_unreachable( - "ByVal alignment is not a multiple of the pointer size"); - - ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; - } - // All aggregates smaller than 8 bytes must be passed right-justified. if (Size==1 || Size==2 || Size==4) { EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); @@ -3922,7 +4321,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, MachinePointerInfo(), VT, false, false, 0); MemOpChains.push_back(Load.getValue(1)); - RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); ArgOffset += PtrByteSize; continue; @@ -3930,9 +4329,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, } if (GPR_idx == NumGPRs && Size < 8) { - SDValue Const = DAG.getConstant(PtrByteSize - Size, - PtrOff.getValueType()); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + SDValue AddPtr = PtrOff; + if (!isLittleEndian) { + SDValue Const = DAG.getConstant(PtrByteSize - Size, + PtrOff.getValueType()); + AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); @@ -3967,8 +4369,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // small aggregates, particularly for packed ones. // FIXME: It would be preferable to use the slot in the // parameter save area instead of a new local variable. - SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + SDValue AddPtr = PtrOff; + if (!isLittleEndian) { + SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); + AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); @@ -3978,7 +4383,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, MachinePointerInfo(), false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); - RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); // Done with this argument. ArgOffset += PtrByteSize; @@ -4007,10 +4412,14 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, switch (Arg.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i1: case MVT::i32: case MVT::i64: + // These can be scalar arguments or elements of an integer array type + // passed directly. Clang may use those instead of "byval" aggregate + // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != NumGPRs) { - RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg)); } else { LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, false, MemOpChains, @@ -4019,40 +4428,70 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, ArgOffset += PtrByteSize; break; case MVT::f32: - case MVT::f64: - if (FPR_idx != NumFPRs) { + case MVT::f64: { + // These can be scalar arguments or elements of a float array type + // passed directly. The latter are used to implement ELFv2 homogenous + // float aggregates. + + // Named arguments go into FPRs first, and once they overflow, the + // remaining arguments go into GPRs and then the parameter save area. + // Unnamed arguments for vararg functions always go to GPRs and + // then the parameter save area. For now, put all arguments to vararg + // routines always in both locations (FPR *and* GPR or stack slot). + bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; + + // First load the argument into the next available FPR. + if (FPR_idx != NumFPRs) RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); - if (isVarArg) { - // A single float or an aggregate containing only a single float - // must be passed right-justified in the stack doubleword, and - // in the GPR, if one is available. - SDValue StoreOff; - if (Arg.getSimpleValueType().SimpleTy == MVT::f32) { - SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); - StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); - } else - StoreOff = PtrOff; - - SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff, - MachinePointerInfo(), false, false, 0); - MemOpChains.push_back(Store); - - // Float varargs are always shadowed in available integer registers - if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, - MachinePointerInfo(), false, false, - false, 0); - MemOpChains.push_back(Load.getValue(1)); - RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); - } - } else if (GPR_idx != NumGPRs) - // If we have any FPRs remaining, we may also have GPRs remaining. - ++GPR_idx; + // Next, load the argument into GPR or stack slot if needed. + if (!NeedGPROrStack) + ; + else if (GPR_idx != NumGPRs) { + // In the non-vararg case, this can only ever happen in the + // presence of f32 array types, since otherwise we never run + // out of FPRs before running out of GPRs. + SDValue ArgVal; + + // Double values are always passed in a single GPR. + if (Arg.getValueType() != MVT::f32) { + ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); + + // Non-array float values are extended and passed in a GPR. + } else if (!Flags.isInConsecutiveRegs()) { + ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); + ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); + + // If we have an array of floats, we collect every odd element + // together with its predecessor into one GPR. + } else if (ArgOffset % PtrByteSize != 0) { + SDValue Lo, Hi; + Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); + Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); + if (!isLittleEndian) + std::swap(Lo, Hi); + ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); + + // The final element, if even, goes into the first half of a GPR. + } else if (Flags.isInConsecutiveRegsLast()) { + ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); + ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); + if (!isLittleEndian) + ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, + DAG.getConstant(32, MVT::i32)); + + // Non-final even elements are skipped; they will be handled + // together the with subsequent argument on the next go-around. + } else + ArgVal = SDValue(); + + if (ArgVal.getNode()) + RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal)); } else { // Single-precision floating-point values are mapped to the // second (rightmost) word of the stack doubleword. - if (Arg.getValueType() == MVT::f32) { + if (Arg.getValueType() == MVT::f32 && + !isLittleEndian && !Flags.isInConsecutiveRegs()) { SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } @@ -4061,27 +4500,32 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, true, isTailCall, false, MemOpChains, TailCallArguments, dl); } - ArgOffset += 8; + // When passing an array of floats, the array occupies consecutive + // space in the argument area; only round up to the next doubleword + // at the end of the array. Otherwise, each float takes 8 bytes. + ArgOffset += (Arg.getValueType() == MVT::f32 && + Flags.isInConsecutiveRegs()) ? 4 : 8; + if (Flags.isInConsecutiveRegsLast()) + ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; break; + } case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: + case MVT::v2f64: + case MVT::v2i64: + // These can be scalar arguments or elements of a vector array type + // passed directly. The latter are used to implement ELFv2 homogenous + // vector aggregates. + + // For a varargs call, named arguments go into VRs or on the stack as + // usual; unnamed arguments always go to the stack or the corresponding + // GPRs when within range. For now, we always put the value in both + // locations (or even all three). if (isVarArg) { - // These go aligned on the stack, or in the corresponding R registers - // when within range. The Darwin PPC ABI doc claims they also go in - // V registers; in fact gcc does this only for arguments that are - // prototyped, not for those that match the ... We do it for all - // arguments, seems to work. - while (ArgOffset % 16 !=0) { - ArgOffset += PtrByteSize; - if (GPR_idx != NumGPRs) - GPR_idx++; - } // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. - PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, - DAG.getConstant(ArgOffset, PtrVT)); SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0); MemOpChains.push_back(Store); @@ -4090,7 +4534,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, MachinePointerInfo(), false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); - RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); + + unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || + Arg.getSimpleValueType() == MVT::v2i64) ? + VSRH[VR_idx] : VR[VR_idx]; + ++VR_idx; + + RegsToPass.push_back(std::make_pair(VReg, Load)); } ArgOffset += 16; for (unsigned i=0; i<16; i+=PtrByteSize) { @@ -4106,43 +4556,49 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, break; } - // Non-varargs Altivec params generally go in registers, but have - // stack space allocated at the end. + // Non-varargs Altivec params go into VRs or on the stack. if (VR_idx != NumVRs) { - // Doesn't have GPR space allocated. - RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); + unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || + Arg.getSimpleValueType() == MVT::v2i64) ? + VSRH[VR_idx] : VR[VR_idx]; + ++VR_idx; + + RegsToPass.push_back(std::make_pair(VReg, Arg)); } else { LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, TailCallArguments, dl); - ArgOffset += 16; } + ArgOffset += 16; break; } } + assert(NumBytesActuallyUsed == ArgOffset); + (void)NumBytesActuallyUsed; + if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Check if this is an indirect call (MTCTR/BCTRL). // See PrepareCall() for more information about calls through function // pointers in the 64-bit SVR4 ABI. if (!isTailCall && !dyn_cast<GlobalAddressSDNode>(Callee) && - !dyn_cast<ExternalSymbolSDNode>(Callee) && - !isBLACompatibleAddress(Callee, DAG)) { + !dyn_cast<ExternalSymbolSDNode>(Callee)) { // Load r2 into a virtual register and store it to the TOC save area. SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); // TOC save area offset. - SDValue PtrOff = DAG.getIntPtrConstant(40); + unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI); + SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), false, false, 0); - // R12 must contain the address of an indirect callee. This does not - // mean the MTCTR instruction must use R12; it's easier to model this - // as an extra parameter, so do that. - RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); + // In the ELFv2 ABI, R12 must contain the address of an indirect callee. + // This does not mean the MTCTR instruction must use R12; it's easier + // to model this as an extra parameter, so do that. + if (isELFv2ABI) + RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); } // Build a sequence of copy-to-reg nodes chained together with token chain @@ -4190,15 +4646,56 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, CallConv == CallingConv::Fast) MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); - unsigned nAltivecParamsAtEnd = 0; - // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. - unsigned NumBytes = - CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv, - Outs, OutVals, - nAltivecParamsAtEnd); + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true, + false); + unsigned NumBytes = LinkageSize; + + // Add up all the space actually used. + // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually + // they all go in registers, but we must reserve stack space for them for + // possible use by the caller. In varargs or 64-bit calls, parameters are + // assigned stack space in order, with padding so Altivec parameters are + // 16-byte aligned. + unsigned nAltivecParamsAtEnd = 0; + for (unsigned i = 0; i != NumOps; ++i) { + ISD::ArgFlagsTy Flags = Outs[i].Flags; + EVT ArgVT = Outs[i].VT; + // Varargs Altivec parameters are padded to a 16 byte boundary. + if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || + ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || + ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { + if (!isVarArg && !isPPC64) { + // Non-varargs Altivec parameters go after all the non-Altivec + // parameters; handle those later so we know how much padding we need. + nAltivecParamsAtEnd++; + continue; + } + // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. + NumBytes = ((NumBytes+15)/16)*16; + } + NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); + } + + // Allow for Altivec parameters at the end, if needed. + if (nAltivecParamsAtEnd) { + NumBytes = ((NumBytes+15)/16)*16; + NumBytes += 16*nAltivecParamsAtEnd; + } + + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); + + // Tail call needs the stack to be aligned. + if (getTargetMachine().Options.GuaranteedTailCallOpt && + CallConv == CallingConv::Fast) + NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes); // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. @@ -4234,20 +4731,20 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. - unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); + unsigned ArgOffset = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - static const uint16_t GPR_32[] = { // 32-bit registers. + static const MCPhysReg GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; - static const uint16_t GPR_64[] = { // 64-bit registers. + static const MCPhysReg GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const uint16_t *FPR = GetFPR(); + static const MCPhysReg *FPR = GetFPR(); - static const uint16_t VR[] = { + static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; @@ -4255,7 +4752,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); - const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; + const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<TailCallArgumentInfo, 8> TailCallArguments; @@ -4338,9 +4835,13 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, switch (Arg.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i1: case MVT::i32: case MVT::i64: if (GPR_idx != NumGPRs) { + if (Arg.getValueType() == MVT::i1) + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); } else { LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, @@ -4481,8 +4982,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // On Darwin, R12 must contain the address of an indirect callee. This does // not mean the MTCTR instruction must use R12; it's easier to model this as @@ -4570,8 +5070,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); - return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, - &RetOps[0], RetOps.size()); + return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); } SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, @@ -4609,8 +5108,8 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - bool isPPC64 = PPCSubTarget.isPPC64(); - bool isDarwinABI = PPCSubTarget.isDarwinABI(); + bool isPPC64 = Subtarget.isPPC64(); + bool isDarwinABI = Subtarget.isDarwinABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Get current frame pointer save index. The users of this index will be @@ -4633,8 +5132,8 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { SDValue PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - bool isPPC64 = PPCSubTarget.isPPC64(); - bool isDarwinABI = PPCSubTarget.isDarwinABI(); + bool isPPC64 = Subtarget.isPPC64(); + bool isDarwinABI = Subtarget.isDarwinABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Get current frame pointer save index. The users of this index will be @@ -4674,7 +5173,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, // Build a DYNALLOC node. SDValue Ops[3] = { Chain, NegSize, FPSIdx }; SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); - return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); + return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); } SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, @@ -4692,6 +5191,55 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, Op.getOperand(0), Op.getOperand(1)); } +SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i1 && + "Custom lowering only for i1 loads"); + + // First, load 8 bits into 32 bits, then truncate to 1 bit. + + SDLoc dl(Op); + LoadSDNode *LD = cast<LoadSDNode>(Op); + + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand *MMO = LD->getMemOperand(); + + SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain, + BasePtr, MVT::i8, MMO); + SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); + + SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; + return DAG.getMergeValues(Ops, dl); +} + +SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOperand(1).getValueType() == MVT::i1 && + "Custom lowering only for i1 stores"); + + // First, zero extend to 32 bits, then use a truncating store to 8 bits. + + SDLoc dl(Op); + StoreSDNode *ST = cast<StoreSDNode>(Op); + + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + SDValue Value = ST->getValue(); + MachineMemOperand *MMO = ST->getMemOperand(); + + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value); + return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); +} + +// FIXME: Remove this once the ANDI glue bug is fixed: +SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i1 && + "Custom lowering only for i1 results"); + + SDLoc DL(Op); + return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, + Op.getOperand(0)); +} + /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { @@ -4805,12 +5353,12 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : - (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ : + (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), dl, MVT::f64, Src); break; case MVT::i64: - assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) && + assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"); Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ, @@ -4819,8 +5367,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, } // Convert the FP value to an int value through memory. - bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() && - (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()); + bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && + (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); @@ -4833,8 +5381,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, - DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), - MVT::i32, MMO); + DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); } else Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI, false, false, 0); @@ -4858,17 +5405,22 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); - assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) && + if (Op.getOperand(0).getValueType() == MVT::i1) + return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), + DAG.getConstantFP(1.0, Op.getValueType()), + DAG.getConstantFP(0.0, Op.getValueType())); + + assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); // If we have FCFIDS, then use it when converting to single-precision. // Otherwise, convert to double-precision and then round. - unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? + unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS : PPCISD::FCFIDS) : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU : PPCISD::FCFID); - MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? + MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? MVT::f32 : MVT::f64; if (Op.getOperand(0).getValueType() == MVT::i64) { @@ -4884,7 +5436,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // However, if -enable-unsafe-fp-math is in effect, accept double // rounding to avoid the extra overhead. if (Op.getValueType() == MVT::f32 && - !PPCSubTarget.hasFPCVT() && + !Subtarget.hasFPCVT() && !DAG.getTarget().Options.UnsafeFPMath) { // Twiddle input to make sure the low 11 bits are zero. (If this @@ -4922,7 +5474,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); - if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) + if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; @@ -4939,7 +5491,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue Ld; - if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) { + if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); @@ -4956,9 +5508,9 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::LFIWZX : PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), - Ops, 2, MVT::i32, MMO); + Ops, MVT::i32, MMO); } else { - assert(PPCSubTarget.isPPC64() && + assert(Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"); int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); @@ -4980,7 +5532,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // FCFID it and return it. SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); - if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) + if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; } @@ -5010,14 +5562,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDValue MFFSreg, InFlag; // Save FP Control Word to register EVT NodeTys[] = { MVT::f64, // return register MVT::Glue // unused in this context }; - SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); + SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); // Save FP register to stack slot int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); @@ -5076,7 +5627,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); SDValue OutOps[] = { OutLo, OutHi }; - return DAG.getMergeValues(OutOps, 2, dl); + return DAG.getMergeValues(OutOps, dl); } SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { @@ -5105,7 +5656,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); SDValue OutOps[] = { OutLo, OutHi }; - return DAG.getMergeValues(OutOps, 2, dl); + return DAG.getMergeValues(OutOps, dl); } SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { @@ -5134,7 +5685,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), Tmp4, Tmp6, ISD::SETLE); SDValue OutOps[] = { OutLo, OutHi }; - return DAG.getMergeValues(OutOps, 2, dl); + return DAG.getMergeValues(OutOps, dl); } //===----------------------------------------------------------------------===// @@ -5163,8 +5714,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SDValue Elt = DAG.getConstant(Val, MVT::i32); SmallVector<SDValue, 8> Ops; Ops.assign(CanonicalVT.getVectorNumElements(), Elt); - SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, - &Ops[0], Ops.size()); + SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops); return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); } @@ -5223,7 +5773,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); - assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); + assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; @@ -5271,10 +5821,14 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // we convert to a pseudo that will be expanded later into one of // the above forms. SDValue Elt = DAG.getConstant(SextVal, MVT::i32); - EVT VT = Op.getValueType(); - int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4); - SDValue EltSize = DAG.getConstant(Size, MVT::i32); - return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); + EVT VT = (SplatSize == 1 ? MVT::v16i8 : + (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); + SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32); + SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); + if (VT == Op.getValueType()) + return RetVal; + else + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); } // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is @@ -5293,6 +5847,22 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } + // The remaining cases assume either big endian element order or + // a splat-size that equates to the element size of the vector + // to be built. An example that doesn't work for little endian is + // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits + // and a vector element size of 16 bits. The code below will + // produce the vector in big endian element order, which for little + // endian is {-1, 0, -1, 0, -1, 0, -1, 0}. + + // For now, just avoid these optimizations in that case. + // FIXME: Develop correct optimizations for LE with mismatched + // splat and element sizes. + + if (Subtarget.isLittleEndian() && + SplatSize != Op.getValueType().getVectorElementType().getSizeInBits()) + return SDValue(); + // Check to see if this is a wide variety of vsplti*, binop self cases. static const signed char SplatCsts[] = { -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, @@ -5461,6 +6031,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SDValue V2 = Op.getOperand(1); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); EVT VT = Op.getValueType(); + bool isLittleEndian = Subtarget.isLittleEndian(); // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be @@ -5469,15 +6040,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, if (PPC::isSplatShuffleMask(SVOp, 1) || PPC::isSplatShuffleMask(SVOp, 2) || PPC::isSplatShuffleMask(SVOp, 4) || - PPC::isVPKUWUMShuffleMask(SVOp, true) || - PPC::isVPKUHUMShuffleMask(SVOp, true) || - PPC::isVSLDOIShuffleMask(SVOp, true) != -1 || - PPC::isVMRGLShuffleMask(SVOp, 1, true) || - PPC::isVMRGLShuffleMask(SVOp, 2, true) || - PPC::isVMRGLShuffleMask(SVOp, 4, true) || - PPC::isVMRGHShuffleMask(SVOp, 1, true) || - PPC::isVMRGHShuffleMask(SVOp, 2, true) || - PPC::isVMRGHShuffleMask(SVOp, 4, true)) { + PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || + PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || + PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) { return Op; } } @@ -5485,15 +6056,16 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // Altivec has a variety of "shuffle immediates" that take two vector inputs // and produce a fixed permutation. If any of these match, do not lower to // VPERM. - if (PPC::isVPKUWUMShuffleMask(SVOp, false) || - PPC::isVPKUHUMShuffleMask(SVOp, false) || - PPC::isVSLDOIShuffleMask(SVOp, false) != -1 || - PPC::isVMRGLShuffleMask(SVOp, 1, false) || - PPC::isVMRGLShuffleMask(SVOp, 2, false) || - PPC::isVMRGLShuffleMask(SVOp, 4, false) || - PPC::isVMRGHShuffleMask(SVOp, 1, false) || - PPC::isVMRGHShuffleMask(SVOp, 2, false) || - PPC::isVMRGHShuffleMask(SVOp, 4, false)) + unsigned int ShuffleKind = isLittleEndian ? 2 : 0; + if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || + PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || + PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG)) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our @@ -5527,7 +6099,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // If this shuffle can be expressed as a shuffle of 4-byte elements, use the // perfect shuffle vector to determine if it is cost effective to do this as // discrete instructions, or whether we should use a vperm. - if (isFourElementShuffle) { + // For now, we skip this for little endian until such time as we have a + // little-endian perfect shuffle table. + if (isFourElementShuffle && !isLittleEndian) { // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; @@ -5556,6 +6130,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except // that it is in input element units, not in bytes. Convert now. + + // For little endian, the order of the input vectors is reversed, and + // the permutation mask is complemented with respect to 31. This is + // necessary to produce proper semantics with the big-endian-biased vperm + // instruction. EVT EltVT = V1.getValueType().getVectorElementType(); unsigned BytesPerElement = EltVT.getSizeInBits()/8; @@ -5564,13 +6143,22 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; for (unsigned j = 0; j != BytesPerElement; ++j) - ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, - MVT::i32)); + if (isLittleEndian) + ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j), + MVT::i32)); + else + ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, + MVT::i32)); } SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, - &ResultMask[0], ResultMask.size()); - return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); + ResultMask); + if (isLittleEndian) + return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), + V2, V1, VPermMask); + else + return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), + V1, V2, VPermMask); } /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an @@ -5644,7 +6232,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, DAG.getConstant(CompareOpc, MVT::i32) }; EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; - SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); + SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); // Now that we have the comparison, emit a copy from the CR to a GPR. // This is flagged to the above dot comparison. @@ -5685,6 +6273,30 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return Flags; } +SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int + // instructions), but for smaller types, we need to first extend up to v2i32 + // before doing going farther. + if (Op.getValueType() == MVT::v2i64) { + EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + if (ExtVT != MVT::v2i32) { + Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); + Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, + DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), + ExtVT.getVectorElementType(), 4))); + Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); + Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, + DAG.getValueType(MVT::v2i32)); + } + + return Op; + } + + return SDValue(); +} + SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -5739,6 +6351,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { LHS, RHS, Zero, DAG, dl); } else if (Op.getValueType() == MVT::v16i8) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); + bool isLittleEndian = Subtarget.isLittleEndian(); // Multiply the even 8-bit parts, producing 16-bit sums. SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, @@ -5750,13 +6363,24 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { LHS, RHS, DAG, dl, MVT::v8i16); OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); - // Merge the results together. + // Merge the results together. Because vmuleub and vmuloub are + // instructions with a big-endian bias, we must reverse the + // element numbering and reverse the meaning of "odd" and "even" + // when generating little endian code. int Ops[16]; for (unsigned i = 0; i != 8; ++i) { - Ops[i*2 ] = 2*i+1; - Ops[i*2+1] = 2*i+1+16; + if (isLittleEndian) { + Ops[i*2 ] = 2*i; + Ops[i*2+1] = 2*i+16; + } else { + Ops[i*2 ] = 2*i+1; + Ops[i*2+1] = 2*i+1+16; + } } - return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); + if (isLittleEndian) + return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); + else + return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); } else { llvm_unreachable("Unknown mul to lower!"); } @@ -5776,21 +6400,24 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::VASTART: - return LowerVASTART(Op, DAG, PPCSubTarget); + return LowerVASTART(Op, DAG, Subtarget); case ISD::VAARG: - return LowerVAARG(Op, DAG, PPCSubTarget); + return LowerVAARG(Op, DAG, Subtarget); case ISD::VACOPY: - return LowerVACOPY(Op, DAG, PPCSubTarget); + return LowerVACOPY(Op, DAG, Subtarget); - case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); + case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget); case ISD::DYNAMIC_STACKALLOC: - return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); + return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget); case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, @@ -5809,6 +6436,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); // For counter-based loop handling. @@ -5852,7 +6480,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, EVT VT = N->getValueType(0); if (VT == MVT::i64) { - SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget); + SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget); Results.push_back(NewNode); Results.push_back(NewNode.getValue(1)); @@ -5914,8 +6542,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, F->insert(It, loopMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); @@ -5964,7 +6591,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address // registers without caring whether they're 32 or 64, but here we're // doing actual arithmetic on the addresses. - bool is64bit = PPCSubTarget.isPPC64(); + bool is64bit = Subtarget.isPPC64(); unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -5983,8 +6610,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, F->insert(It, loopMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); @@ -6136,7 +6762,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), MBB, - llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + std::next(MachineBasicBlock::iterator(MI)), MBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); // Note that the structure of the jmp_buf used here is not compatible @@ -6160,7 +6786,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, unsigned LabelReg = MRI.createVirtualRegister(PtrRC); unsigned BufReg = MI->getOperand(1).getReg(); - if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) { + if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) .addReg(PPC::X2) .addImm(TOCOffset) @@ -6173,12 +6799,12 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, unsigned BaseReg; if (MF->getFunction()->getAttributes().hasAttribute( AttributeSet::FunctionIndex, Attribute::Naked)) - BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1; + BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; else - BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP; + BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; MIB = BuildMI(*thisMBB, MI, DL, - TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW)) + TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) .addReg(BaseReg) .addImm(BPOffset) .addReg(BufReg); @@ -6202,10 +6828,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // mainMBB: // mainDstReg = 0 MIB = BuildMI(mainMBB, DL, - TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); + TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); // Store IP - if (PPCSubTarget.isPPC64()) { + if (Subtarget.isPPC64()) { MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) .addReg(LabelReg) .addImm(LabelOffset) @@ -6255,7 +6881,10 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, // Since FP is only updated here but NOT referenced, it's treated as GPR. unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; - unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30; + unsigned BP = (PVT == MVT::i64) ? PPC::X30 : + (Subtarget.isSVR4ABI() && + MF->getTarget().getRelocationModel() == Reloc::PIC_ ? + PPC::R29 : PPC::R30); MachineInstrBuilder MIB; @@ -6317,7 +6946,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MIB.setMemRefs(MMOBegin, MMOEnd); // Reload TOC - if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { + if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) .addImm(TOCOffset) .addReg(BufReg); @@ -6355,10 +6984,16 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); - if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || - MI->getOpcode() == PPC::SELECT_CC_I8)) { + if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8 || + MI->getOpcode() == PPC::SELECT_I4 || + MI->getOpcode() == PPC::SELECT_I8)) { SmallVector<MachineOperand, 2> Cond; - Cond.push_back(MI->getOperand(4)); + if (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8) + Cond.push_back(MI->getOperand(4)); + else + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); Cond.push_back(MI->getOperand(1)); DebugLoc dl = MI->getDebugLoc(); @@ -6370,9 +7005,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_CC_I8 || MI->getOpcode() == PPC::SELECT_CC_F4 || MI->getOpcode() == PPC::SELECT_CC_F8 || - MI->getOpcode() == PPC::SELECT_CC_VRRC) { - - + MI->getOpcode() == PPC::SELECT_CC_VRRC || + MI->getOpcode() == PPC::SELECT_I4 || + MI->getOpcode() == PPC::SELECT_I8 || + MI->getOpcode() == PPC::SELECT_F4 || + MI->getOpcode() == PPC::SELECT_F8 || + MI->getOpcode() == PPC::SELECT_VRRC) { // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to // select between, and a branch opcode to use. @@ -6386,23 +7024,31 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *thisMBB = BB; MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - unsigned SelectPred = MI->getOperand(4).getImm(); DebugLoc dl = MI->getDebugLoc(); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); - BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + if (MI->getOpcode() == PPC::SELECT_I4 || + MI->getOpcode() == PPC::SELECT_I8 || + MI->getOpcode() == PPC::SELECT_F4 || + MI->getOpcode() == PPC::SELECT_F8 || + MI->getOpcode() == PPC::SELECT_VRRC) { + BuildMI(BB, dl, TII->get(PPC::BC)) + .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + } else { + unsigned SelectPred = MI->getOperand(4).getImm(); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + } // copy0MBB: // %FalseValue = ... @@ -6458,13 +7104,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) - BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC); + BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) - BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC); + BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC); + BB = EmitAtomicBinary(MI, BB, false, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8); + BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); @@ -6504,8 +7150,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, midMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: @@ -6556,7 +7201,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // We must use 64-bit registers for addresses when targeting 64-bit, // since we're actually doing arithmetic on them. Other registers // can be 32-bit. - bool is64bit = PPCSubTarget.isPPC64(); + bool is64bit = Subtarget.isPPC64(); bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; unsigned dest = MI->getOperand(0).getReg(); @@ -6575,8 +7220,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, midMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); @@ -6725,6 +7369,27 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); + } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || + MI->getOpcode() == PPC::ANDIo_1_GT_BIT || + MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || + MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) { + unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || + MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ? + PPC::ANDIo8 : PPC::ANDIo; + bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || + MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? + &PPC::GPRCRegClass : + &PPC::G8RCRegClass); + + DebugLoc dl = MI->getDebugLoc(); + BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) + .addReg(MI->getOperand(1).getReg()).addImm(1); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), + MI->getOperand(0).getReg()) + .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -6744,9 +7409,10 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, EVT VT = Op.getValueType(); - if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) || - (VT == MVT::f64 && PPCSubTarget.hasFRE()) || - (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { + if ((VT == MVT::f32 && Subtarget.hasFRES()) || + (VT == MVT::f64 && Subtarget.hasFRE()) || + (VT == MVT::v4f32 && Subtarget.hasAltivec()) || + (VT == MVT::v2f64 && Subtarget.hasVSX())) { // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) // For the reciprocal, we need to find the zero of the function: @@ -6759,7 +7425,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, // correct after every iteration. The minimum architected relative // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has // 23 digits and double has 52 digits. - int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3; + int Iterations = Subtarget.hasRecipPrec() ? 1 : 3; if (VT.getScalarType() == MVT::f64) ++Iterations; @@ -6806,9 +7472,10 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, EVT VT = Op.getValueType(); - if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) || - (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) || - (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { + if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || + (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || + (VT == MVT::v4f32 && Subtarget.hasAltivec()) || + (VT == MVT::v2f64 && Subtarget.hasVSX())) { // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) // For the reciprocal sqrt, we need to find the zero of the function: @@ -6821,7 +7488,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, // correct after every iteration. The minimum architected relative // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has // 23 digits and double has 52 digits. - int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3; + int Iterations = Subtarget.hasRecipPrec() ? 1 : 3; if (VT.getScalarType() == MVT::f64) ++Iterations; @@ -6899,8 +7566,8 @@ static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base, return true; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const GlobalValue *GV1 = NULL; - const GlobalValue *GV2 = NULL; + const GlobalValue *GV1 = nullptr; + const GlobalValue *GV2 = nullptr; int64_t Offset1 = 0; int64_t Offset2 = 0; bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); @@ -6938,10 +7605,9 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { if (!Visited.count(ChainLD->getChain().getNode())) Queue.push_back(ChainLD->getChain().getNode()); } else if (ChainNext->getOpcode() == ISD::TokenFactor) { - for (SDNode::op_iterator O = ChainNext->op_begin(), - OE = ChainNext->op_end(); O != OE; ++O) - if (!Visited.count(O->getNode())) - Queue.push_back(O->getNode()); + for (const SDUse &O : ChainNext->ops()) + if (!Visited.count(O.getNode())) + Queue.push_back(O.getNode()); } else LoadRoots.insert(ChainNext); } @@ -6979,6 +7645,534 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { return false; } +SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + + assert(Subtarget.useCRBits() && + "Expecting to be tracking CR bits"); + // If we're tracking CR bits, we need to be careful that we don't have: + // trunc(binary-ops(zext(x), zext(y))) + // or + // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) + // such that we're unnecessarily moving things into GPRs when it would be + // better to keep them in CR bits. + + // Note that trunc here can be an actual i1 trunc, or can be the effective + // truncation that comes from a setcc or select_cc. + if (N->getOpcode() == ISD::TRUNCATE && + N->getValueType(0) != MVT::i1) + return SDValue(); + + if (N->getOperand(0).getValueType() != MVT::i32 && + N->getOperand(0).getValueType() != MVT::i64) + return SDValue(); + + if (N->getOpcode() == ISD::SETCC || + N->getOpcode() == ISD::SELECT_CC) { + // If we're looking at a comparison, then we need to make sure that the + // high bits (all except for the first) don't matter the result. + ISD::CondCode CC = + cast<CondCodeSDNode>(N->getOperand( + N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); + unsigned OpBits = N->getOperand(0).getValueSizeInBits(); + + if (ISD::isSignedIntSetCC(CC)) { + if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || + DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) + return SDValue(); + } else if (ISD::isUnsignedIntSetCC(CC)) { + if (!DAG.MaskedValueIsZero(N->getOperand(0), + APInt::getHighBitsSet(OpBits, OpBits-1)) || + !DAG.MaskedValueIsZero(N->getOperand(1), + APInt::getHighBitsSet(OpBits, OpBits-1))) + return SDValue(); + } else { + // This is neither a signed nor an unsigned comparison, just make sure + // that the high bits are equal. + APInt Op1Zero, Op1One; + APInt Op2Zero, Op2One; + DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); + DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); + + // We don't really care about what is known about the first bit (if + // anything), so clear it in all masks prior to comparing them. + Op1Zero.clearBit(0); Op1One.clearBit(0); + Op2Zero.clearBit(0); Op2One.clearBit(0); + + if (Op1Zero != Op2Zero || Op1One != Op2One) + return SDValue(); + } + } + + // We now know that the higher-order bits are irrelevant, we just need to + // make sure that all of the intermediate operations are bit operations, and + // all inputs are extensions. + if (N->getOperand(0).getOpcode() != ISD::AND && + N->getOperand(0).getOpcode() != ISD::OR && + N->getOperand(0).getOpcode() != ISD::XOR && + N->getOperand(0).getOpcode() != ISD::SELECT && + N->getOperand(0).getOpcode() != ISD::SELECT_CC && + N->getOperand(0).getOpcode() != ISD::TRUNCATE && + N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && + N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && + N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) + return SDValue(); + + if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && + N->getOperand(1).getOpcode() != ISD::AND && + N->getOperand(1).getOpcode() != ISD::OR && + N->getOperand(1).getOpcode() != ISD::XOR && + N->getOperand(1).getOpcode() != ISD::SELECT && + N->getOperand(1).getOpcode() != ISD::SELECT_CC && + N->getOperand(1).getOpcode() != ISD::TRUNCATE && + N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && + N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && + N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) + return SDValue(); + + SmallVector<SDValue, 4> Inputs; + SmallVector<SDValue, 8> BinOps, PromOps; + SmallPtrSet<SDNode *, 16> Visited; + + for (unsigned i = 0; i < 2; ++i) { + if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || + N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || + N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && + N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || + isa<ConstantSDNode>(N->getOperand(i))) + Inputs.push_back(N->getOperand(i)); + else + BinOps.push_back(N->getOperand(i)); + + if (N->getOpcode() == ISD::TRUNCATE) + break; + } + + // Visit all inputs, collect all binary operations (and, or, xor and + // select) that are all fed by extensions. + while (!BinOps.empty()) { + SDValue BinOp = BinOps.back(); + BinOps.pop_back(); + + if (!Visited.insert(BinOp.getNode())) + continue; + + PromOps.push_back(BinOp); + + for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { + // The condition of the select is not promoted. + if (BinOp.getOpcode() == ISD::SELECT && i == 0) + continue; + if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) + continue; + + if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && + BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || + isa<ConstantSDNode>(BinOp.getOperand(i))) { + Inputs.push_back(BinOp.getOperand(i)); + } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || + BinOp.getOperand(i).getOpcode() == ISD::OR || + BinOp.getOperand(i).getOpcode() == ISD::XOR || + BinOp.getOperand(i).getOpcode() == ISD::SELECT || + BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || + BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || + BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || + BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { + BinOps.push_back(BinOp.getOperand(i)); + } else { + // We have an input that is not an extension or another binary + // operation; we'll abort this transformation. + return SDValue(); + } + } + } + + // Make sure that this is a self-contained cluster of operations (which + // is not quite the same thing as saying that everything has only one + // use). + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + if (isa<ConstantSDNode>(Inputs[i])) + continue; + + for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), + UE = Inputs[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + + // Make sure that we're not going to promote the non-output-value + // operand(s) or SELECT or SELECT_CC. + // FIXME: Although we could sometimes handle this, and it does occur in + // practice that one of the condition inputs to the select is also one of + // the outputs, we currently can't deal with this. + if (User->getOpcode() == ISD::SELECT) { + if (User->getOperand(0) == Inputs[i]) + return SDValue(); + } else if (User->getOpcode() == ISD::SELECT_CC) { + if (User->getOperand(0) == Inputs[i] || + User->getOperand(1) == Inputs[i]) + return SDValue(); + } + } + } + + for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { + for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), + UE = PromOps[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + + // Make sure that we're not going to promote the non-output-value + // operand(s) or SELECT or SELECT_CC. + // FIXME: Although we could sometimes handle this, and it does occur in + // practice that one of the condition inputs to the select is also one of + // the outputs, we currently can't deal with this. + if (User->getOpcode() == ISD::SELECT) { + if (User->getOperand(0) == PromOps[i]) + return SDValue(); + } else if (User->getOpcode() == ISD::SELECT_CC) { + if (User->getOperand(0) == PromOps[i] || + User->getOperand(1) == PromOps[i]) + return SDValue(); + } + } + } + + // Replace all inputs with the extension operand. + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + // Constants may have users outside the cluster of to-be-promoted nodes, + // and so we need to replace those as we do the promotions. + if (isa<ConstantSDNode>(Inputs[i])) + continue; + else + DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); + } + + // Replace all operations (these are all the same, but have a different + // (i1) return type). DAG.getNode will validate that the types of + // a binary operator match, so go through the list in reverse so that + // we've likely promoted both operands first. Any intermediate truncations or + // extensions disappear. + while (!PromOps.empty()) { + SDValue PromOp = PromOps.back(); + PromOps.pop_back(); + + if (PromOp.getOpcode() == ISD::TRUNCATE || + PromOp.getOpcode() == ISD::SIGN_EXTEND || + PromOp.getOpcode() == ISD::ZERO_EXTEND || + PromOp.getOpcode() == ISD::ANY_EXTEND) { + if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && + PromOp.getOperand(0).getValueType() != MVT::i1) { + // The operand is not yet ready (see comment below). + PromOps.insert(PromOps.begin(), PromOp); + continue; + } + + SDValue RepValue = PromOp.getOperand(0); + if (isa<ConstantSDNode>(RepValue)) + RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); + + DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); + continue; + } + + unsigned C; + switch (PromOp.getOpcode()) { + default: C = 0; break; + case ISD::SELECT: C = 1; break; + case ISD::SELECT_CC: C = 2; break; + } + + if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && + PromOp.getOperand(C).getValueType() != MVT::i1) || + (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && + PromOp.getOperand(C+1).getValueType() != MVT::i1)) { + // The to-be-promoted operands of this node have not yet been + // promoted (this should be rare because we're going through the + // list backward, but if one of the operands has several users in + // this cluster of to-be-promoted nodes, it is possible). + PromOps.insert(PromOps.begin(), PromOp); + continue; + } + + SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), + PromOp.getNode()->op_end()); + + // If there are any constant inputs, make sure they're replaced now. + for (unsigned i = 0; i < 2; ++i) + if (isa<ConstantSDNode>(Ops[C+i])) + Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); + + DAG.ReplaceAllUsesOfValueWith(PromOp, + DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); + } + + // Now we're left with the initial truncation itself. + if (N->getOpcode() == ISD::TRUNCATE) + return N->getOperand(0); + + // Otherwise, this is a comparison. The operands to be compared have just + // changed type (to i1), but everything else is the same. + return SDValue(N, 0); +} + +SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + + // If we're tracking CR bits, we need to be careful that we don't have: + // zext(binary-ops(trunc(x), trunc(y))) + // or + // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) + // such that we're unnecessarily moving things into CR bits that can more + // efficiently stay in GPRs. Note that if we're not certain that the high + // bits are set as required by the final extension, we still may need to do + // some masking to get the proper behavior. + + // This same functionality is important on PPC64 when dealing with + // 32-to-64-bit extensions; these occur often when 32-bit values are used as + // the return values of functions. Because it is so similar, it is handled + // here as well. + + if (N->getValueType(0) != MVT::i32 && + N->getValueType(0) != MVT::i64) + return SDValue(); + + if (!((N->getOperand(0).getValueType() == MVT::i1 && + Subtarget.useCRBits()) || + (N->getOperand(0).getValueType() == MVT::i32 && + Subtarget.isPPC64()))) + return SDValue(); + + if (N->getOperand(0).getOpcode() != ISD::AND && + N->getOperand(0).getOpcode() != ISD::OR && + N->getOperand(0).getOpcode() != ISD::XOR && + N->getOperand(0).getOpcode() != ISD::SELECT && + N->getOperand(0).getOpcode() != ISD::SELECT_CC) + return SDValue(); + + SmallVector<SDValue, 4> Inputs; + SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; + SmallPtrSet<SDNode *, 16> Visited; + + // Visit all inputs, collect all binary operations (and, or, xor and + // select) that are all fed by truncations. + while (!BinOps.empty()) { + SDValue BinOp = BinOps.back(); + BinOps.pop_back(); + + if (!Visited.insert(BinOp.getNode())) + continue; + + PromOps.push_back(BinOp); + + for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { + // The condition of the select is not promoted. + if (BinOp.getOpcode() == ISD::SELECT && i == 0) + continue; + if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) + continue; + + if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || + isa<ConstantSDNode>(BinOp.getOperand(i))) { + Inputs.push_back(BinOp.getOperand(i)); + } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || + BinOp.getOperand(i).getOpcode() == ISD::OR || + BinOp.getOperand(i).getOpcode() == ISD::XOR || + BinOp.getOperand(i).getOpcode() == ISD::SELECT || + BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { + BinOps.push_back(BinOp.getOperand(i)); + } else { + // We have an input that is not a truncation or another binary + // operation; we'll abort this transformation. + return SDValue(); + } + } + } + + // Make sure that this is a self-contained cluster of operations (which + // is not quite the same thing as saying that everything has only one + // use). + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + if (isa<ConstantSDNode>(Inputs[i])) + continue; + + for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), + UE = Inputs[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + + // Make sure that we're not going to promote the non-output-value + // operand(s) or SELECT or SELECT_CC. + // FIXME: Although we could sometimes handle this, and it does occur in + // practice that one of the condition inputs to the select is also one of + // the outputs, we currently can't deal with this. + if (User->getOpcode() == ISD::SELECT) { + if (User->getOperand(0) == Inputs[i]) + return SDValue(); + } else if (User->getOpcode() == ISD::SELECT_CC) { + if (User->getOperand(0) == Inputs[i] || + User->getOperand(1) == Inputs[i]) + return SDValue(); + } + } + } + + for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { + for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), + UE = PromOps[i].getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User != N && !Visited.count(User)) + return SDValue(); + + // Make sure that we're not going to promote the non-output-value + // operand(s) or SELECT or SELECT_CC. + // FIXME: Although we could sometimes handle this, and it does occur in + // practice that one of the condition inputs to the select is also one of + // the outputs, we currently can't deal with this. + if (User->getOpcode() == ISD::SELECT) { + if (User->getOperand(0) == PromOps[i]) + return SDValue(); + } else if (User->getOpcode() == ISD::SELECT_CC) { + if (User->getOperand(0) == PromOps[i] || + User->getOperand(1) == PromOps[i]) + return SDValue(); + } + } + } + + unsigned PromBits = N->getOperand(0).getValueSizeInBits(); + bool ReallyNeedsExt = false; + if (N->getOpcode() != ISD::ANY_EXTEND) { + // If all of the inputs are not already sign/zero extended, then + // we'll still need to do that at the end. + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + if (isa<ConstantSDNode>(Inputs[i])) + continue; + + unsigned OpBits = + Inputs[i].getOperand(0).getValueSizeInBits(); + assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); + + if ((N->getOpcode() == ISD::ZERO_EXTEND && + !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), + APInt::getHighBitsSet(OpBits, + OpBits-PromBits))) || + (N->getOpcode() == ISD::SIGN_EXTEND && + DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < + (OpBits-(PromBits-1)))) { + ReallyNeedsExt = true; + break; + } + } + } + + // Replace all inputs, either with the truncation operand, or a + // truncation or extension to the final output type. + for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { + // Constant inputs need to be replaced with the to-be-promoted nodes that + // use them because they might have users outside of the cluster of + // promoted nodes. + if (isa<ConstantSDNode>(Inputs[i])) + continue; + + SDValue InSrc = Inputs[i].getOperand(0); + if (Inputs[i].getValueType() == N->getValueType(0)) + DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); + else if (N->getOpcode() == ISD::SIGN_EXTEND) + DAG.ReplaceAllUsesOfValueWith(Inputs[i], + DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); + else if (N->getOpcode() == ISD::ZERO_EXTEND) + DAG.ReplaceAllUsesOfValueWith(Inputs[i], + DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); + else + DAG.ReplaceAllUsesOfValueWith(Inputs[i], + DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); + } + + // Replace all operations (these are all the same, but have a different + // (promoted) return type). DAG.getNode will validate that the types of + // a binary operator match, so go through the list in reverse so that + // we've likely promoted both operands first. + while (!PromOps.empty()) { + SDValue PromOp = PromOps.back(); + PromOps.pop_back(); + + unsigned C; + switch (PromOp.getOpcode()) { + default: C = 0; break; + case ISD::SELECT: C = 1; break; + case ISD::SELECT_CC: C = 2; break; + } + + if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && + PromOp.getOperand(C).getValueType() != N->getValueType(0)) || + (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && + PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { + // The to-be-promoted operands of this node have not yet been + // promoted (this should be rare because we're going through the + // list backward, but if one of the operands has several users in + // this cluster of to-be-promoted nodes, it is possible). + PromOps.insert(PromOps.begin(), PromOp); + continue; + } + + SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), + PromOp.getNode()->op_end()); + + // If this node has constant inputs, then they'll need to be promoted here. + for (unsigned i = 0; i < 2; ++i) { + if (!isa<ConstantSDNode>(Ops[C+i])) + continue; + if (Ops[C+i].getValueType() == N->getValueType(0)) + continue; + + if (N->getOpcode() == ISD::SIGN_EXTEND) + Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); + else if (N->getOpcode() == ISD::ZERO_EXTEND) + Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); + else + Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); + } + + DAG.ReplaceAllUsesOfValueWith(PromOp, + DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); + } + + // Now we're left with the initial extension itself. + if (!ReallyNeedsExt) + return N->getOperand(0); + + // To zero extend, just mask off everything except for the first bit (in the + // i1 case). + if (N->getOpcode() == ISD::ZERO_EXTEND) + return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), + DAG.getConstant(APInt::getLowBitsSet( + N->getValueSizeInBits(0), PromBits), + N->getValueType(0))); + + assert(N->getOpcode() == ISD::SIGN_EXTEND && + "Invalid extension type"); + EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0)); + SDValue ShiftCst = + DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy); + return DAG.getNode(ISD::SRA, dl, N->getValueType(0), + DAG.getNode(ISD::SHL, dl, N->getValueType(0), + N->getOperand(0), ShiftCst), ShiftCst); +} + SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { const TargetMachine &TM = getTargetMachine(); @@ -7005,6 +8199,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return N->getOperand(0); } break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + return DAGCombineExtBoolTrunc(N, DCI); + case ISD::TRUNCATE: + case ISD::SETCC: + case ISD::SELECT_CC: + return DAGCombineTruncBoolExt(N, DCI); case ISD::FDIV: { assert(TM.Options.UnsafeFPMath && "Reciprocal estimates require UnsafeFPMath"); @@ -7012,7 +8214,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (N->getOperand(1).getOpcode() == ISD::FSQRT) { SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), N->getOperand(0), RV); @@ -7022,7 +8224,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)), N->getValueType(0), RV); @@ -7035,7 +8237,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)), N->getValueType(0), RV, @@ -7047,7 +8249,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), N->getOperand(0), RV); @@ -7062,12 +8264,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the // reciprocal sqrt. SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); RV = DAGCombineFastRecip(RV, DCI); - if (RV.getNode() != 0) { - // Unfortunately, RV is now NaN if the input was exactly 0. Select out - // this case and force the answer to 0. + if (RV.getNode()) { + // Unfortunately, RV is now NaN if the input was exactly 0. Select out + // this case and force the answer to 0. EVT VT = RV.getValueType(); @@ -7143,7 +8345,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, }; Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, - DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), + DAG.getVTList(MVT::Other), Ops, cast<StoreSDNode>(N)->getMemoryVT(), cast<StoreSDNode>(N)->getMemOperand()); DCI.AddToWorklist(Val.getNode()); @@ -7170,8 +8372,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, }; return DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), - Ops, array_lengthof(Ops), - cast<StoreSDNode>(N)->getMemoryVT(), + Ops, cast<StoreSDNode>(N)->getMemoryVT(), cast<StoreSDNode>(N)->getMemOperand()); } break; @@ -7188,6 +8389,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // This is a type-legal unaligned Altivec load. SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); + bool isLittleEndian = Subtarget.isLittleEndian(); // This implements the loading of unaligned vectors as described in // the venerable Apple Velocity Engine overview. Specifically: @@ -7195,25 +8397,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html // // The general idea is to expand a sequence of one or more unaligned - // loads into a alignment-based permutation-control instruction (lvsl), - // a series of regular vector loads (which always truncate their - // input address to an aligned address), and a series of permutations. - // The results of these permutations are the requested loaded values. - // The trick is that the last "extra" load is not taken from the address - // you might suspect (sizeof(vector) bytes after the last requested - // load), but rather sizeof(vector) - 1 bytes after the last - // requested vector. The point of this is to avoid a page fault if the - // base address happend to be aligned. This works because if the base - // address is aligned, then adding less than a full vector length will - // cause the last vector in the sequence to be (re)loaded. Otherwise, - // the next vector will be fetched as you might suspect was necessary. + // loads into an alignment-based permutation-control instruction (lvsl + // or lvsr), a series of regular vector loads (which always truncate + // their input address to an aligned address), and a series of + // permutations. The results of these permutations are the requested + // loaded values. The trick is that the last "extra" load is not taken + // from the address you might suspect (sizeof(vector) bytes after the + // last requested load), but rather sizeof(vector) - 1 bytes after the + // last requested vector. The point of this is to avoid a page fault if + // the base address happened to be aligned. This works because if the + // base address is aligned, then adding less than a full vector length + // will cause the last vector in the sequence to be (re)loaded. + // Otherwise, the next vector will be fetched as you might suspect was + // necessary. // We might be able to reuse the permutation generation from // a different base address offset from this one by an aligned amount. // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this // optimization later. - SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr, - DAG, dl, MVT::v16i8); + Intrinsic::ID Intr = (isLittleEndian ? + Intrinsic::ppc_altivec_lvsr : + Intrinsic::ppc_altivec_lvsl); + SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8); // Refine the alignment of the original load (a "new" load created here // which was identical to the first except for the alignment would be @@ -7262,8 +8467,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (ExtraLoad.getValueType() != MVT::v4i32) ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad); - SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, - BaseLoad, ExtraLoad, PermCntl, DAG, dl); + // Because vperm has a big-endian bias, we must reverse the order + // of the input vectors and complement the permute control vector + // when generating little endian code. We have already handled the + // latter by using lvsr instead of lvsl, so just reverse BaseLoad + // and ExtraLoad here. + SDValue Perm; + if (isLittleEndian) + Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, + ExtraLoad, BaseLoad, PermCntl, DAG, dl); + else + Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, + BaseLoad, ExtraLoad, PermCntl, DAG, dl); if (VT != MVT::v4i32) Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); @@ -7288,24 +8503,26 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, ++UI; SmallVector<SDValue, 8> Ops; - for (SDNode::op_iterator O = User->op_begin(), - OE = User->op_end(); O != OE; ++O) { - if (*O == Use) + for (const SDUse &O : User->ops()) { + if (O == Use) Ops.push_back(To); else - Ops.push_back(*O); + Ops.push_back(O); } - DAG.UpdateNodeOperands(User, Ops.data(), Ops.size()); + DAG.UpdateNodeOperands(User, Ops); } return SDValue(N, 0); } } break; - case ISD::INTRINSIC_WO_CHAIN: - if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == - Intrinsic::ppc_altivec_lvsl && + case ISD::INTRINSIC_WO_CHAIN: { + bool isLittleEndian = Subtarget.isLittleEndian(); + Intrinsic::ID Intr = (isLittleEndian ? + Intrinsic::ppc_altivec_lvsr : + Intrinsic::ppc_altivec_lvsl); + if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) { SDValue Add = N->getOperand(1); @@ -7317,8 +8534,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, UE = BasePtr->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == - Intrinsic::ppc_altivec_lvsl) { - // We've found another LVSL, and this address if an aligned + Intr) { + // We've found another LVSL/LVSR, and this address is an aligned // multiple of that one. The results will be the same, so use the // one we've just found instead. @@ -7327,6 +8544,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } } + } break; case ISD::BSWAP: @@ -7349,7 +8567,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, DAG.getVTList(N->getValueType(0) == MVT::i64 ? MVT::i64 : MVT::i32, MVT::Other), - Ops, 3, LD->getMemoryVT(), LD->getMemOperand()); + Ops, LD->getMemoryVT(), LD->getMemOperand()); // If this is an i16 load, insert the truncate. SDValue ResVal = BSLoad; @@ -7379,7 +8597,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, !N->getOperand(2).hasOneUse()) { // Scan all of the users of the LHS, looking for VCMPo's that match. - SDNode *VCMPoNode = 0; + SDNode *VCMPoNode = nullptr; SDNode *LHSN = N->getOperand(0).getNode(); for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); @@ -7400,9 +8618,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Look at the (necessarily single) use of the flag value. If it has a // chain, this transformation is more complex. Note that multiple things // could use the value result, which we should ignore. - SDNode *FlagUser = 0; + SDNode *FlagUser = nullptr; for (SDNode::use_iterator UI = VCMPoNode->use_begin(); - FlagUser == 0; ++UI) { + FlagUser == nullptr; ++UI) { assert(UI != VCMPoNode->use_end() && "Didn't find user!"); SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { @@ -7420,6 +8638,25 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } break; } + case ISD::BRCOND: { + SDValue Cond = N->getOperand(1); + SDValue Target = N->getOperand(2); + + if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && + cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == + Intrinsic::ppc_is_decremented_ctr_nonzero) { + + // We now need to make the intrinsic dead (it cannot be instruction + // selected). + DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); + assert(Cond.getNode()->hasOneUse() && + "Counter decrement has more than one use"); + + return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, + N->getOperand(0), Target); + } + } + break; case ISD::BR_CC: { // If this is a branch on an altivec predicate comparison, lower this so // that we don't have to do a MFOCRF: instead, branch directly on CR6. This @@ -7488,7 +8725,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAG.getConstant(CompareOpc, MVT::i32) }; EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; - SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); + SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); // Unpack the result based on how the target uses it. PPC::Predicate CompOpc; @@ -7524,11 +8761,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Inline Assembly Support //===----------------------------------------------------------------------===// -void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; @@ -7584,6 +8821,11 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const { // suboptimal. return C_Memory; } + } else if (Constraint == "wc") { // individual CR bits. + return C_RegisterClass; + } else if (Constraint == "wa" || Constraint == "wd" || + Constraint == "wf" || Constraint == "ws") { + return C_RegisterClass; // VSX registers. } return TargetLowering::getConstraintType(Constraint); } @@ -7598,10 +8840,21 @@ PPCTargetLowering::getSingleConstraintMatchWeight( Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); + // Look at the constraint type. + if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) + return CW_Register; // an individual CR bit. + else if ((StringRef(constraint) == "wa" || + StringRef(constraint) == "wd" || + StringRef(constraint) == "wf") && + type->isVectorTy()) + return CW_Register; + else if (StringRef(constraint) == "ws" && type->isDoubleTy()) + return CW_Register; + switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); @@ -7639,11 +8892,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // GCC RS6000 Constraint Letters switch (Constraint[0]) { case 'b': // R1-R31 - if (VT == MVT::i64 && PPCSubTarget.isPPC64()) + if (VT == MVT::i64 && Subtarget.isPPC64()) return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); case 'r': // R0-R31 - if (VT == MVT::i64 && PPCSubTarget.isPPC64()) + if (VT == MVT::i64 && Subtarget.isPPC64()) return std::make_pair(0U, &PPC::G8RCRegClass); return std::make_pair(0U, &PPC::GPRCRegClass); case 'f': @@ -7657,6 +8910,13 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case 'y': // crrc return std::make_pair(0U, &PPC::CRRCRegClass); } + } else if (Constraint == "wc") { // an individual CR bit. + return std::make_pair(0U, &PPC::CRBITRCRegClass); + } else if (Constraint == "wa" || Constraint == "wd" || + Constraint == "wf") { + return std::make_pair(0U, &PPC::VSRCRegClass); + } else if (Constraint == "ws") { + return std::make_pair(0U, &PPC::VSFRCRegClass); } std::pair<unsigned, const TargetRegisterClass*> R = @@ -7668,7 +8928,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // register. // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use // the AsmName field from *RegisterInfo.td, then this would not be necessary. - if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() && + if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && PPC::GPRCRegClass.contains(R.first)) { const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); return std::make_pair(TRI->getMatchingSuperReg(R.first, @@ -7686,7 +8946,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector<SDValue>&Ops, SelectionDAG &DAG) const { - SDValue Result(0,0); + SDValue Result; // Only support length 1 constraints. if (Constraint.length() > 1) return; @@ -7792,6 +9052,9 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + SDLoc dl(Op); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -7799,8 +9062,8 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, // the stack. PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); FuncInfo->setLRStoreRequired(); - bool isPPC64 = PPCSubTarget.isPPC64(); - bool isDarwinABI = PPCSubTarget.isDarwinABI(); + bool isPPC64 = Subtarget.isPPC64(); + bool isDarwinABI = Subtarget.isDarwinABI(); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); @@ -7850,6 +9113,30 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, return FrameAddr; } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned PPCTargetLowering::getRegisterByName(const char* RegName, + EVT VT) const { + bool isPPC64 = Subtarget.isPPC64(); + bool isDarwinABI = Subtarget.isDarwinABI(); + + if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || + (!isPPC64 && VT != MVT::i32)) + report_fatal_error("Invalid register global variable type"); + + bool is64Bit = isPPC64 && VT == MVT::i64; + unsigned Reg = StringSwitch<unsigned>(RegName) + .Case("r1", is64Bit ? PPC::X1 : PPC::R1) + .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2)) + .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : + (is64Bit ? PPC::X13 : PPC::R13)) + .Default(0); + + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + bool PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The PowerPC target isn't yet aware of offsets. @@ -7872,14 +9159,51 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { - if (this->PPCSubTarget.isPPC64()) { + if (Subtarget.isPPC64()) { return MVT::i64; } else { return MVT::i32; } } +/// \brief Returns true if it is beneficial to convert a load of a constant +/// to just the constant itself. +bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0 || BitSize > 64) + return false; + return true; +} + +bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) + return false; + unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); + unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); + return NumBits1 == 64 && NumBits2 == 32; +} + +bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { + if (!VT1.isInteger() || !VT2.isInteger()) + return false; + unsigned NumBits1 = VT1.getSizeInBits(); + unsigned NumBits2 = VT2.getSizeInBits(); + return NumBits1 == 64 && NumBits2 == 32; +} + +bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + return isInt<16>(Imm) || isUInt<16>(Imm); +} + +bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { + return isInt<16>(Imm) || isUInt<16>(Imm); +} + bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, + unsigned, bool *Fast) const { if (DisablePPCUnaligned) return false; @@ -7893,8 +9217,14 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, if (!VT.isSimple()) return false; - if (VT.getSimpleVT().isVector()) - return false; + if (VT.getSimpleVT().isVector()) { + if (Subtarget.hasVSX()) { + if (VT != MVT::v2f64 && VT != MVT::v2i64) + return false; + } else { + return false; + } + } if (VT == MVT::ppcf128) return false; @@ -7922,8 +9252,17 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } +bool +PPCTargetLowering::shouldExpandBuildVectorWithShuffles( + EVT VT , unsigned DefinedValues) const { + if (VT == MVT::v2i64) + return false; + + return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); +} + Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { - if (DisableILPPref || PPCSubTarget.enableMachineScheduler()) + if (DisableILPPref || Subtarget.enableMachineScheduler()) return TargetLowering::getSchedulingPreference(N); return Sched::ILP; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index df3af35761ee..c9394dd12e7b 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -18,9 +18,8 @@ #include "PPC.h" #include "PPCInstrInfo.h" #include "PPCRegisterInfo.h" -#include "PPCSubtarget.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" namespace llvm { @@ -71,19 +70,14 @@ namespace llvm { TOC_ENTRY, - /// The following three target-specific nodes are used for calls through + /// The following two target-specific nodes are used for calls through /// function pointers in the 64-bit SVR4 ABI. - /// Restore the TOC from the TOC save area of the current stack frame. - /// This is basically a hard coded load instruction which additionally - /// takes/produces a flag. - TOC_RESTORE, - /// Like a regular LOAD but additionally taking/producing a flag. LOAD, - /// LOAD into r2 (also taking/producing a flag). Like TOC_RESTORE, this is - /// a hard coded load instruction. + /// Like LOAD (taking/producing a flag), but using r2 as hard-coded + /// destination. LOAD_TOC, /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) @@ -121,6 +115,12 @@ namespace llvm { /// resultant GPR. Bits corresponding to other CR regs are undefined. MFOCRF, + // FIXME: Remove these once the ANDI glue bug is fixed: + /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the + /// eq or gt bit of CR0 after executing andi. x, 1. This is used to + /// implement truncation of i32 or i64 to i1. + ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT, + // EH_SJLJ_SETJMP - SjLj exception handling setjmp. EH_SJLJ_SETJMP, @@ -177,6 +177,10 @@ namespace llvm { CR6SET, CR6UNSET, + /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS + /// on PPC32. + PPC32_GOT, + /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec /// TLS model, produces an ADDIS8 instruction that adds the GOT /// base to sym\@got\@tprel\@ha. @@ -293,25 +297,28 @@ namespace llvm { namespace PPC { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. - bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); + bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, + SelectionDAG &DAG); /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. - bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); + bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, + SelectionDAG &DAG); /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary); + unsigned ShuffleKind, SelectionDAG &DAG); /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary); + unsigned ShuffleKind, SelectionDAG &DAG); - /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift - /// amount, otherwise return -1. - int isVSLDOIShuffleMask(SDNode *N, bool isUnary); + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the + /// shift amount, otherwise return -1. + int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, + SelectionDAG &DAG); /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to @@ -324,7 +331,7 @@ namespace llvm { /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. - unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize); + unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); /// get_VSPLTI_elt - If this is a build_vector of constants which can be /// formed by using a vspltis[bhw] instruction of the specified element @@ -333,28 +340,29 @@ namespace llvm { SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); } + class PPCSubtarget; class PPCTargetLowering : public TargetLowering { - const PPCSubtarget &PPCSubTarget; + const PPCSubtarget &Subtarget; public: explicit PPCTargetLowering(PPCTargetMachine &TM); /// getTargetNodeName() - This method returns the name of a target specific /// DAG node. - virtual const char *getTargetNodeName(unsigned Opcode) const; + const char *getTargetNodeName(unsigned Opcode) const override; - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } /// getSetCCResultType - Return the ISD::SETCC ValueType - virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; + EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. - virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, - SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const; + bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const override; /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it @@ -374,29 +382,31 @@ namespace llvm { bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const; - Sched::Preference getSchedulingPreference(SDNode *N) const; + Sched::Preference getSchedulingPreference(SDNode *N) const override; /// LowerOperation - Provide custom lowering hooks for some operations. /// - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// - virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) const; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, + SelectionDAG &DAG) const override; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - virtual void computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth = 0) const; + unsigned getRegisterByName(const char* RegName, EVT VT) const override; - virtual MachineBasicBlock * + void computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; + + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; + MachineBasicBlock *MBB) const override; MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, bool is64Bit, unsigned BinOpcode) const; @@ -410,34 +420,58 @@ namespace llvm { MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock *MBB) const; - ConstraintType getConstraintType(const std::string &Constraint) const; + ConstraintType + getConstraintType(const std::string &Constraint) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. ConstraintWeight getSingleConstraintMatchWeight( - AsmOperandInfo &info, const char *constraint) const; + AsmOperandInfo &info, const char *constraint) const override; std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + MVT VT) const override; /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. - unsigned getByValTypeAlignment(Type *Ty) const; + unsigned getByValTypeAlignment(Type *Ty) const override; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. - virtual void LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector<SDValue> &Ops, - SelectionDAG &DAG) const; + void LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const override; /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + + /// isLegalICmpImmediate - Return true if the specified immediate is legal + /// icmp immediate, that is the target has icmp instructions which can + /// compare a register against the immediate without having to materialize + /// the immediate into a register. + bool isLegalICmpImmediate(int64_t Imm) const override; + + /// isLegalAddImmediate - Return true if the specified immediate is legal + /// add immediate, that is the target has add instructions which can + /// add a register and the immediate without having to materialize + /// the immediate into a register. + bool isLegalAddImmediate(int64_t Imm) const override; + + /// isTruncateFree - Return true if it's free to truncate a value of + /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in + /// register X1 to i32 by referencing its sub-register R1. + bool isTruncateFree(Type *Ty1, Type *Ty2) const override; + bool isTruncateFree(EVT VT1, EVT VT2) const override; - virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + /// \brief Returns true if it is beneficial to convert a load of a constant + /// to just the constant itself. + bool shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const override; + + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove @@ -450,25 +484,46 @@ namespace llvm { /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. - virtual EVT + EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, - MachineFunction &MF) const; + MachineFunction &MF) const override; /// Is unaligned memory access allowed for the given type, and is it fast /// relative to software emulation. - virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const; + bool allowsUnalignedMemoryAccesses(EVT VT, + unsigned AddrSpace, + bool *Fast = nullptr) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be /// expanded to FMAs when this method returns true, otherwise fmuladd is /// expanded to fmul + fadd. - virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const; + bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + + // Should we expand the build vector with shuffles? + bool + shouldExpandBuildVectorWithShuffles(EVT VT, + unsigned DefinedValues) const override; /// createFastISel - This method returns a target-specific FastISel object, /// or null if the target does not support "fast" instruction selection. - virtual FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, - const TargetLibraryInfo *LibInfo) const; + FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) const override; + + /// \brief Returns true if an argument of type Ty needs to be passed in a + /// contiguous block of registers in calling convention CallConv. + bool functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override { + // We support any array type as "consecutive" block in the parameter + // save area. The element type defines the alignment requirement and + // whether the argument should go in GPRs, FPRs, or VRs if available. + // + // Note that clang uses this capability both to implement the ELFv2 + // homogeneous float/vector aggregate ABI, and to avoid having to use + // "byval" when passing aggregates that might fully fit in registers. + return Ty->isArrayTy(); + } private: SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; @@ -509,6 +564,9 @@ namespace llvm { const PPCSubtarget &Subtarget) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; @@ -520,6 +578,7 @@ namespace llvm { SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, @@ -538,39 +597,34 @@ namespace llvm { const SmallVectorImpl<ISD::InputArg> &Ins, SmallVectorImpl<SDValue> &InVals) const; - virtual SDValue + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + SmallVectorImpl<SDValue> &InVals) const override; - virtual SDValue + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl<SDValue> &InVals) const; + SmallVectorImpl<SDValue> &InVals) const override; - virtual bool + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - LLVMContext &Context) const; + LLVMContext &Context) const override; - virtual SDValue + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - SDLoc dl, SelectionDAG &DAG) const; + SDLoc dl, SelectionDAG &DAG) const override; SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, SDValue ArgVal, SDLoc dl) const; - void - setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG, - unsigned nAltivecParamsAtEnd, - unsigned MinReservedArea, bool isPPC64) const; - SDValue LowerFormalArguments_Darwin(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -625,6 +679,8 @@ namespace llvm { SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const; SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 46db4fe91308..9ed384f56244 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -19,11 +19,13 @@ def s16imm64 : Operand<i64> { let PrintMethod = "printS16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS16ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; } def u16imm64 : Operand<i64> { let PrintMethod = "printU16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCU16ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<16>"; } def s17imm64 : Operand<i64> { // This operand type is used for addis/lis to allow the assembler parser @@ -32,14 +34,11 @@ def s17imm64 : Operand<i64> { let PrintMethod = "printS16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS17ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; } def tocentry : Operand<iPTR> { let MIOperandInfo = (ops i64imm:$imm); } -def PPCTLSRegOperand : AsmOperandClass { - let Name = "TLSReg"; let PredicateMethod = "isTLSReg"; - let RenderMethod = "addTLSRegOperands"; -} def tlsreg : Operand<i64> { let EncoderMethod = "getTLSRegEncoding"; let ParserMatchClass = PPCTLSRegOperand; @@ -80,15 +79,22 @@ def HI48_64 : SDNodeXForm<imm, [{ // Calls. // -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in { - def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, + def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>, + Requires<[In64BitMode]>; + def BCCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), + "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB, + []>, Requires<[In64BitMode]>; - let isCodeGenOnly = 1 in - def BCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), - "b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>, + def BCCTR8 : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi), + "bcctr 12, $bi, 0", IIC_BrB, []>, + Requires<[In64BitMode]>; + def BCCTR8n : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi), + "bcctr 4, $bi, 0", IIC_BrB, []>, Requires<[In64BitMode]>; } } @@ -107,9 +113,9 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in { def BDZLR8 : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), - "bdzlr", BrB, []>; + "bdzlr", IIC_BrB, []>; def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), - "bdnzlr", BrB, []>; + "bdnzlr", IIC_BrB, []>; } } @@ -119,41 +125,58 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. + "bl $func", IIC_BrB, []>; // See Pat patterns below. def BL8_TLS : IForm<18, 0, 1, (outs), (ins tlscall:$func), - "bl $func", BrB, []>; + "bl $func", IIC_BrB, []>; def BLA8 : IForm<18, 1, 1, (outs), (ins abscalltarget:$func), - "bla $func", BrB, [(PPCcall (i64 imm:$func))]>; + "bla $func", IIC_BrB, [(PPCcall (i64 imm:$func))]>; } let Uses = [RM], isCodeGenOnly = 1 in { def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), (ins calltarget:$func), - "bl $func\n\tnop", BrB, []>; + "bl $func\n\tnop", IIC_BrB, []>; def BL8_NOP_TLS : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), (ins tlscall:$func), - "bl $func\n\tnop", BrB, []>; + "bl $func\n\tnop", IIC_BrB, []>; def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24, (outs), (ins abscalltarget:$func), - "bla $func\n\tnop", BrB, + "bla $func\n\tnop", IIC_BrB, [(PPCcall_nop (i64 imm:$func))]>; } let Uses = [CTR8, RM] in { def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), - "bctrl", BrB, [(PPCbctrl)]>, + "bctrl", IIC_BrB, [(PPCbctrl)]>, Requires<[In64BitMode]>; - let isCodeGenOnly = 1 in - def BCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), - "b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>, - Requires<[In64BitMode]>; + let isCodeGenOnly = 1 in { + def BCCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), + "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB, + []>, + Requires<[In64BitMode]>; + + def BCCTRL8 : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi), + "bcctrl 12, $bi, 0", IIC_BrB, []>, + Requires<[In64BitMode]>; + def BCCTRL8n : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi), + "bcctrl 4, $bi, 0", IIC_BrB, []>, + Requires<[In64BitMode]>; + } } } } // Interpretation64Bit +// FIXME: Duplicating this for the asm parser should be unnecessary, but the +// previous definition must be marked as CodeGen only to prevent decoding +// conflicts. +let Interpretation64Bit = 1, isAsmParserOnly = 1 in +let isCall = 1, PPC970_Unit = 7, Defs = [LR8], Uses = [RM] in +def BL8_TLS_ : IForm<18, 0, 1, (outs), (ins tlscall:$func), + "bl $func", IIC_BrB, []>; + // Calls def : Pat<(PPCcall (i64 tglobaladdr:$dst)), (BL8 tglobaladdr:$dst)>; @@ -199,16 +222,16 @@ let usesCustomInserter = 1 in { // Instructions to support atomic operations def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), - "ldarx $rD, $ptr", LdStLDARX, + "ldarx $rD, $ptr", IIC_LdStLDARX, [(set i64:$rD, (PPClarx xoaddr:$ptr))]>; let Defs = [CR0] in def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst), - "stdcx. $rS, $dst", LdStSTDCX, + "stdcx. $rS, $dst", IIC_LdStSTDCX, [(PPCstcx i64:$rS, xoaddr:$dst)]>, isDOT; -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in def TCRETURNdi8 :Pseudo< (outs), (ins calltarget:$dst, i32imm:$offset), @@ -225,28 +248,23 @@ def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset), "#TC_RETURNr8 $dst $offset", []>; -let isCodeGenOnly = 1 in { - let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in -def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, +def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>, Requires<[In64BitMode]>; - let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst), - "b $dst", BrB, + "b $dst", IIC_BrB, []>; - let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst), - "ba $dst", BrB, + "ba $dst", IIC_BrB, []>; - -} } // Interpretation64Bit def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm), @@ -260,23 +278,23 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), // 64-bit CR instructions -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in { def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST), - "mtocrf $FXM, $ST", BrMCRX>, + "mtocrf $FXM, $ST", IIC_BrMCRX>, PPC970_DGroup_First, PPC970_Unit_CRU; def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS), - "mtcrf $FXM, $rS", BrMCRX>, + "mtcrf $FXM, $rS", IIC_BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking. def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM), - "mfocrf $rT, $FXM", SprMFCR>, + "mfocrf $rT, $FXM", IIC_SprMFCRF>, PPC970_DGroup_First, PPC970_Unit_CRU; def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins), - "mfcr $rT", SprMFCR>, + "mfcr $rT", IIC_SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; } // neverHasSideEffects = 1 @@ -298,24 +316,24 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { let Uses = [CTR8] in { def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs g8rc:$rT), (ins), - "mfctr $rT", SprMFSPR>, + "mfctr $rT", IIC_SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in { def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), - "mtctr $rS", SprMTSPR>, + "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR8] in { +let hasSideEffects = 1, Defs = [CTR8] in { let Pattern = [(int_ppc_mtctr i64:$rS)] in def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), - "mtctr $rS", SprMTSPR>, + "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let isCodeGenOnly = 1, Pattern = [(set i64:$rT, readcyclecounter)] in +let Pattern = [(set i64:$rT, readcyclecounter)] in def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins), - "mfspr $rT, 268", SprMFTB>, + "mfspr $rT, 268", IIC_SprMFTB>, PPC970_DGroup_First, PPC970_Unit_FXU; // Note that encoding mftb using mfspr is now the preferred form, // and has been since at least ISA v2.03. The mftb instruction has @@ -329,12 +347,12 @@ def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#D let Defs = [LR8] in { def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS), - "mtlr $rS", SprMTSPR>, + "mtlr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Uses = [LR8] in { def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins), - "mflr $rT", SprMFSPR>, + "mflr $rT", IIC_SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } } // Interpretation64Bit @@ -346,213 +364,236 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins), let PPC970_Unit = 1 in { // FXU Operations. let Interpretation64Bit = 1 in { let neverHasSideEffects = 1 in { +let isCodeGenOnly = 1 in { let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins s16imm64:$imm), - "li $rD, $imm", IntSimple, + "li $rD, $imm", IIC_IntSimple, [(set i64:$rD, imm64SExt16:$imm)]>; def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins s17imm64:$imm), - "lis $rD, $imm", IntSimple, + "lis $rD, $imm", IIC_IntSimple, [(set i64:$rD, imm16ShiftedSExt:$imm)]>; } // Logical ops. +let isCommutable = 1 in { defm NAND8: XForm_6r<31, 476, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "nand", "$rA, $rS, $rB", IntSimple, + "nand", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>; defm AND8 : XForm_6r<31, 28, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "and", "$rA, $rS, $rB", IntSimple, + "and", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (and i64:$rS, i64:$rB))]>; +} // isCommutable defm ANDC8: XForm_6r<31, 60, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "andc", "$rA, $rS, $rB", IntSimple, + "andc", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>; +let isCommutable = 1 in { defm OR8 : XForm_6r<31, 444, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "or", "$rA, $rS, $rB", IntSimple, + "or", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (or i64:$rS, i64:$rB))]>; defm NOR8 : XForm_6r<31, 124, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "nor", "$rA, $rS, $rB", IntSimple, + "nor", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>; +} // isCommutable defm ORC8 : XForm_6r<31, 412, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "orc", "$rA, $rS, $rB", IntSimple, + "orc", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>; +let isCommutable = 1 in { defm EQV8 : XForm_6r<31, 284, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "eqv", "$rA, $rS, $rB", IntSimple, + "eqv", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>; defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "xor", "$rA, $rS, $rB", IntSimple, + "xor", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (xor i64:$rS, i64:$rB))]>; +} // let isCommutable = 1 // Logical ops with immediate. let Defs = [CR0] in { -def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "andi. $dst, $src1, $src2", IntGeneral, +def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "andi. $dst, $src1, $src2", IIC_IntGeneral, [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>, isDOT; -def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "andis. $dst, $src1, $src2", IntGeneral, +def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "andis. $dst, $src1, $src2", IIC_IntGeneral, [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>, isDOT; } -def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "ori $dst, $src1, $src2", IntSimple, +def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "ori $dst, $src1, $src2", IIC_IntSimple, [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>; -def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "oris $dst, $src1, $src2", IntSimple, +def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "oris $dst, $src1, $src2", IIC_IntSimple, [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>; -def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "xori $dst, $src1, $src2", IntSimple, +def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "xori $dst, $src1, $src2", IIC_IntSimple, [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>; -def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "xoris $dst, $src1, $src2", IntSimple, +def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "xoris $dst, $src1, $src2", IIC_IntSimple, [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>; +let isCommutable = 1 in defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "add", "$rT, $rA, $rB", IntSimple, + "add", "$rT, $rA, $rB", IIC_IntSimple, [(set i64:$rT, (add i64:$rA, i64:$rB))]>; // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the // initial-exec thread-local storage model. def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB), - "add $rT, $rA, $rB", IntSimple, + "add $rT, $rA, $rB", IIC_IntSimple, [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>; +let isCommutable = 1 in defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "addc", "$rT, $rA, $rB", IntGeneral, + "addc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (addc i64:$rA, i64:$rB))]>, PPC970_DGroup_Cracked; + let Defs = [CARRY] in def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), - "addic $rD, $rA, $imm", IntGeneral, + "addic $rD, $rA, $imm", IIC_IntGeneral, [(set i64:$rD, (addc i64:$rA, imm64SExt16:$imm))]>; def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm), - "addi $rD, $rA, $imm", IntSimple, + "addi $rD, $rA, $imm", IIC_IntSimple, [(set i64:$rD, (add i64:$rA, imm64SExt16:$imm))]>; def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s17imm64:$imm), - "addis $rD, $rA, $imm", IntSimple, + "addis $rD, $rA, $imm", IIC_IntSimple, [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>; let Defs = [CARRY] in { def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), - "subfic $rD, $rA, $imm", IntGeneral, + "subfic $rD, $rA, $imm", IIC_IntGeneral, [(set i64:$rD, (subc imm64SExt16:$imm, i64:$rA))]>; defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "subfc", "$rT, $rA, $rB", IntGeneral, + "subfc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (subc i64:$rB, i64:$rA))]>, PPC970_DGroup_Cracked; } defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "subf", "$rT, $rA, $rB", IntGeneral, + "subf", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (sub i64:$rB, i64:$rA))]>; defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "neg", "$rT, $rA", IntSimple, + "neg", "$rT, $rA", IIC_IntSimple, [(set i64:$rT, (ineg i64:$rA))]>; let Uses = [CARRY] in { +let isCommutable = 1 in defm ADDE8 : XOForm_1rc<31, 138, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "adde", "$rT, $rA, $rB", IntGeneral, + "adde", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (adde i64:$rA, i64:$rB))]>; defm ADDME8 : XOForm_3rc<31, 234, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "addme", "$rT, $rA", IntGeneral, + "addme", "$rT, $rA", IIC_IntGeneral, [(set i64:$rT, (adde i64:$rA, -1))]>; defm ADDZE8 : XOForm_3rc<31, 202, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "addze", "$rT, $rA", IntGeneral, + "addze", "$rT, $rA", IIC_IntGeneral, [(set i64:$rT, (adde i64:$rA, 0))]>; defm SUBFE8 : XOForm_1rc<31, 136, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "subfe", "$rT, $rA, $rB", IntGeneral, + "subfe", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (sube i64:$rB, i64:$rA))]>; defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "subfme", "$rT, $rA", IntGeneral, + "subfme", "$rT, $rA", IIC_IntGeneral, [(set i64:$rT, (sube -1, i64:$rA))]>; defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "subfze", "$rT, $rA", IntGeneral, + "subfze", "$rT, $rA", IIC_IntGeneral, [(set i64:$rT, (sube 0, i64:$rA))]>; } +} // isCodeGenOnly +// FIXME: Duplicating this for the asm parser should be unnecessary, but the +// previous definition must be marked as CodeGen only to prevent decoding +// conflicts. +let isAsmParserOnly = 1 in +def ADD8TLS_ : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB), + "add $rT, $rA, $rB", IIC_IntSimple, []>; +let isCommutable = 1 in { defm MULHD : XOForm_1r<31, 73, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "mulhd", "$rT, $rA, $rB", IntMulHW, + "mulhd", "$rT, $rA, $rB", IIC_IntMulHW, [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>; defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "mulhdu", "$rT, $rA, $rB", IntMulHWU, + "mulhdu", "$rT, $rA, $rB", IIC_IntMulHWU, [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>; +} // isCommutable } } // Interpretation64Bit let isCompare = 1, neverHasSideEffects = 1 in { def CMPD : XForm_16_ext<31, 0, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB), - "cmpd $crD, $rA, $rB", IntCompare>, isPPC64; + "cmpd $crD, $rA, $rB", IIC_IntCompare>, isPPC64; def CMPLD : XForm_16_ext<31, 32, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB), - "cmpld $crD, $rA, $rB", IntCompare>, isPPC64; - def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm:$imm), - "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64; - def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm:$src2), - "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64; + "cmpld $crD, $rA, $rB", IIC_IntCompare>, isPPC64; + def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm64:$imm), + "cmpdi $crD, $rA, $imm", IIC_IntCompare>, isPPC64; + def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "cmpldi $dst, $src1, $src2", + IIC_IntCompare>, isPPC64; } let neverHasSideEffects = 1 in { defm SLD : XForm_6r<31, 27, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), - "sld", "$rA, $rS, $rB", IntRotateD, + "sld", "$rA, $rS, $rB", IIC_IntRotateD, [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64; defm SRD : XForm_6r<31, 539, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), - "srd", "$rA, $rS, $rB", IntRotateD, + "srd", "$rA, $rS, $rB", IIC_IntRotateD, [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64; defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), - "srad", "$rA, $rS, $rB", IntRotateD, + "srad", "$rA, $rS, $rB", IIC_IntRotateD, [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64; -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS), - "extsb", "$rA, $rS", IntSimple, + "extsb", "$rA, $rS", IIC_IntSimple, [(set i64:$rA, (sext_inreg i64:$rS, i8))]>; defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS), - "extsh", "$rA, $rS", IntSimple, + "extsh", "$rA, $rS", IIC_IntSimple, [(set i64:$rA, (sext_inreg i64:$rS, i16))]>; } // Interpretation64Bit // For fast-isel: let isCodeGenOnly = 1 in { def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS), - "extsb $rA, $rS", IntSimple, []>, isPPC64; + "extsb $rA, $rS", IIC_IntSimple, []>, isPPC64; def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS), - "extsh $rA, $rS", IntSimple, []>, isPPC64; + "extsh $rA, $rS", IIC_IntSimple, []>, isPPC64; } // isCodeGenOnly for fast-isel defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS), - "extsw", "$rA, $rS", IntSimple, + "extsw", "$rA, $rS", IIC_IntSimple, [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS), - "extsw", "$rA, $rS", IntSimple, + "extsw", "$rA, $rS", IIC_IntSimple, [(set i64:$rA, (sext i32:$rS))]>, isPPC64; defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), - "sradi", "$rA, $rS, $SH", IntRotateDI, + "sradi", "$rA, $rS, $SH", IIC_IntRotateDI, [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS), - "cntlzd", "$rA, $rS", IntGeneral, + "cntlzd", "$rA, $rS", IIC_IntGeneral, [(set i64:$rA, (ctlz i64:$rS))]>; def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS), - "popcntd $rA, $rS", IntGeneral, + "popcntd $rA, $rS", IIC_IntGeneral, [(set i64:$rA, (ctpop i64:$rS))]>; // popcntw also does a population count on the high 32 bits (storing the // results in the high 32-bits of the output). We'll ignore that here (which is // safe because we never separately use the high part of the 64-bit registers). def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS), - "popcntw $rA, $rS", IntGeneral, + "popcntw $rA, $rS", IIC_IntGeneral, [(set i32:$rA, (ctpop i32:$rS))]>; defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divd", "$rT, $rA, $rB", IntDivD, + "divd", "$rT, $rA, $rB", IIC_IntDivD, [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64, PPC970_DGroup_First, PPC970_DGroup_Cracked; defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divdu", "$rT, $rA, $rB", IntDivD, + "divdu", "$rT, $rA, $rB", IIC_IntDivD, [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64, PPC970_DGroup_First, PPC970_DGroup_Cracked; +let isCommutable = 1 in defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "mulld", "$rT, $rA, $rB", IntMulHD, + "mulld", "$rT, $rA, $rB", IIC_IntMulHD, [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), - "mulli $rD, $rA, $imm", IntMulLI, + "mulli $rD, $rA, $imm", IIC_IntMulLI, [(set i64:$rD, (mul i64:$rA, imm64SExt16:$imm))]>; } @@ -560,7 +601,7 @@ let neverHasSideEffects = 1 in { let isCommutable = 1 in { defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA), (ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE), - "rldimi", "$rA, $rS, $SH, $MBE", IntRotateDI, + "rldimi", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64, RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; } @@ -568,43 +609,53 @@ defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA), // Rotate instructions. defm RLDCL : MDSForm_1r<30, 8, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE), - "rldcl", "$rA, $rS, $rB, $MBE", IntRotateD, + "rldcl", "$rA, $rS, $rB, $MBE", IIC_IntRotateD, []>, isPPC64; defm RLDCR : MDSForm_1r<30, 9, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE), - "rldcr", "$rA, $rS, $rB, $MBE", IntRotateD, + "rldcr", "$rA, $rS, $rB, $MBE", IIC_IntRotateD, []>, isPPC64; defm RLDICL : MDForm_1r<30, 0, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), - "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI, + "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; // For fast-isel: let isCodeGenOnly = 1 in def RLDICL_32_64 : MDForm_1<30, 0, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), - "rldicl $rA, $rS, $SH, $MBE", IntRotateDI, + "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; // End fast-isel. defm RLDICR : MDForm_1r<30, 1, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), - "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI, + "rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; defm RLDIC : MDForm_1r<30, 2, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), - "rldic", "$rA, $rS, $SH, $MBE", IntRotateDI, + "rldic", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA), (ins g8rc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), - "rlwinm", "$rA, $rS, $SH, $MB, $ME", IntGeneral, + "rlwinm", "$rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, []>; +let isCommutable = 1 in { +// RLWIMI can be commuted if the rotate amount is zero. +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm RLWIMI8 : MForm_2r<20, (outs g8rc:$rA), + (ins g8rc:$rSi, g8rc:$rS, u5imm:$SH, u5imm:$MB, + u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", + IIC_IntRotate, []>, PPC970_DGroup_Cracked, + RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; +} + let isSelect = 1 in def ISEL8 : AForm_4<31, 15, (outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond), - "isel $rT, $rA, $rB, $cond", IntGeneral, + "isel $rT, $rA, $rB, $cond", IIC_IntGeneral, []>; } // Interpretation64Bit } // neverHasSideEffects = 1 @@ -618,111 +669,111 @@ def ISEL8 : AForm_4<31, 15, // Sign extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src), - "lha $rD, $src", LdStLHA, + "lha $rD, $src", IIC_LdStLHA, [(set i64:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src), - "lwa $rD, $src", LdStLWA, + "lwa $rD, $src", IIC_LdStLWA, [(set i64:$rD, (aligned4sextloadi32 ixaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAX8: XForm_1<31, 343, (outs g8rc:$rD), (ins memrr:$src), - "lhax $rD, $src", LdStLHA, + "lhax $rD, $src", IIC_LdStLHA, [(set i64:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src), - "lwax $rD, $src", LdStLHA, + "lwax $rD, $src", IIC_LdStLHA, [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; // For fast-isel: let isCodeGenOnly = 1, mayLoad = 1 in { def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src), - "lwa $rD, $src", LdStLWA, []>, isPPC64, + "lwa $rD, $src", IIC_LdStLWA, []>, isPPC64, PPC970_DGroup_Cracked; def LWAX_32 : XForm_1<31, 341, (outs gprc:$rD), (ins memrr:$src), - "lwax $rD, $src", LdStLHA, []>, isPPC64, + "lwax $rD, $src", IIC_LdStLHA, []>, isPPC64, PPC970_DGroup_Cracked; } // end fast-isel isCodeGenOnly // Update forms. let mayLoad = 1, neverHasSideEffects = 1 in { -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAU8 : DForm_1<43, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStLHAU, + "lhau $rD, $addr", IIC_LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAUX8 : XForm_1<31, 375, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLHAU, + "lhaux $rD, $addr", IIC_LdStLHAUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lwaux $rD, $addr", LdStLHAU, + "lwaux $rD, $addr", IIC_LdStLHAUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } } -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { // Zero extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src), - "lbz $rD, $src", LdStLoad, + "lbz $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi8 iaddr:$src))]>; def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src), - "lhz $rD, $src", LdStLoad, + "lhz $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi16 iaddr:$src))]>; def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src), - "lwz $rD, $src", LdStLoad, + "lwz $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; def LBZX8 : XForm_1<31, 87, (outs g8rc:$rD), (ins memrr:$src), - "lbzx $rD, $src", LdStLoad, + "lbzx $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi8 xaddr:$src))]>; def LHZX8 : XForm_1<31, 279, (outs g8rc:$rD), (ins memrr:$src), - "lhzx $rD, $src", LdStLoad, + "lhzx $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi16 xaddr:$src))]>; def LWZX8 : XForm_1<31, 23, (outs g8rc:$rD), (ins memrr:$src), - "lwzx $rD, $src", LdStLoad, + "lwzx $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi32 xaddr:$src))]>; // Update forms. let mayLoad = 1, neverHasSideEffects = 1 in { def LBZU8 : DForm_1<35, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoadUpd, + "lbzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU8 : DForm_1<41, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoadUpd, + "lhzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU8 : DForm_1<33, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoadUpd, + "lwzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LBZUX8 : XForm_1<31, 119, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoadUpd, + "lbzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX8 : XForm_1<31, 311, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoadUpd, + "lhzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoadUpd, + "lwzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; } @@ -733,7 +784,7 @@ def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), // Full 8-byte loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), - "ld $rD, $src", LdStLD, + "ld $rD, $src", IIC_LdStLD, [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64; // The following three definitions are selected for small code model only. // Otherwise, we need to create two instructions to form a 32-bit offset, @@ -751,33 +802,27 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), [(set i64:$rD, (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64; -let hasSideEffects = 1, isCodeGenOnly = 1 in { -let RST = 2, DS = 2 in -def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg), - "ld 2, 8($reg)", LdStLD, - [(PPCload_toc i64:$reg)]>, isPPC64; - -let RST = 2, DS = 10, RA = 1 in -def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), - "ld 2, 40(1)", LdStLD, - [(PPCtoc_restore)]>, isPPC64; -} +let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2, Defs = [X2] in +def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src), + "ld 2, $src", IIC_LdStLD, + [(PPCload_toc ixaddr:$src)]>, isPPC64; + def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src), - "ldx $rD, $src", LdStLD, + "ldx $rD, $src", IIC_LdStLD, [(set i64:$rD, (load xaddr:$src))]>, isPPC64; def LDBRX : XForm_1<31, 532, (outs g8rc:$rD), (ins memrr:$src), - "ldbrx $rD, $src", LdStLoad, + "ldbrx $rD, $src", IIC_LdStLoad, [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64; let mayLoad = 1, neverHasSideEffects = 1 in { def LDU : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr), - "ldu $rD, $addr", LdStLDU, + "ldu $rD, $addr", IIC_LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "ldux $rD, $addr", LdStLDU, + "ldux $rD, $addr", IIC_LdStLDUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -860,78 +905,79 @@ def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), isPPC64; let PPC970_Unit = 2 in { -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { // Truncating stores. def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src), - "stb $rS, $src", LdStStore, + "stb $rS, $src", IIC_LdStStore, [(truncstorei8 i64:$rS, iaddr:$src)]>; def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src), - "sth $rS, $src", LdStStore, + "sth $rS, $src", IIC_LdStStore, [(truncstorei16 i64:$rS, iaddr:$src)]>; def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src), - "stw $rS, $src", LdStStore, + "stw $rS, $src", IIC_LdStStore, [(truncstorei32 i64:$rS, iaddr:$src)]>; def STBX8 : XForm_8<31, 215, (outs), (ins g8rc:$rS, memrr:$dst), - "stbx $rS, $dst", LdStStore, + "stbx $rS, $dst", IIC_LdStStore, [(truncstorei8 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX8 : XForm_8<31, 407, (outs), (ins g8rc:$rS, memrr:$dst), - "sthx $rS, $dst", LdStStore, + "sthx $rS, $dst", IIC_LdStStore, [(truncstorei16 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX8 : XForm_8<31, 151, (outs), (ins g8rc:$rS, memrr:$dst), - "stwx $rS, $dst", LdStStore, + "stwx $rS, $dst", IIC_LdStStore, [(truncstorei32 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; } // Interpretation64Bit // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst), - "std $rS, $dst", LdStSTD, + "std $rS, $dst", IIC_LdStSTD, [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64; def STDX : XForm_8<31, 149, (outs), (ins g8rc:$rS, memrr:$dst), - "stdx $rS, $dst", LdStSTD, + "stdx $rS, $dst", IIC_LdStSTD, [(store i64:$rS, xaddr:$dst)]>, isPPC64, PPC970_DGroup_Cracked; def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst), - "stdbrx $rS, $dst", LdStStore, + "stdbrx $rS, $dst", IIC_LdStStore, [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64, PPC970_DGroup_Cracked; } // Stores with Update (pre-inc). let PPC970_Unit = 2, mayStore = 1 in { -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), - "stbu $rS, $dst", LdStStoreUpd, []>, + "stbu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), - "sthu $rS, $dst", LdStStoreUpd, []>, + "sthu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), - "stwu $rS, $dst", LdStStoreUpd, []>, + "stwu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; -def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst), - "stdu $rS, $dst", LdStSTDU, []>, - RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, - isPPC64; def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "stbux $rS, $dst", LdStStoreUpd, []>, + "stbux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "sthux $rS, $dst", LdStStoreUpd, []>, + "sthux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "stwux $rS, $dst", LdStStoreUpd, []>, + "stwux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; } // Interpretation64Bit +def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst), + "stdu $rS, $dst", IIC_LdStSTDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, + isPPC64; + def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "stdux $rS, $dst", LdStSTDU, []>, + "stdux $rS, $dst", IIC_LdStSTDUX, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked, isPPC64; } @@ -966,29 +1012,29 @@ def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), let PPC970_Unit = 3, neverHasSideEffects = 1, Uses = [RM] in { // FPU Operations. defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB), - "fcfid", "$frD, $frB", FPGeneral, + "fcfid", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64; defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB), - "fctid", "$frD, $frB", FPGeneral, + "fctid", "$frD, $frB", IIC_FPGeneral, []>, isPPC64; defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB), - "fctidz", "$frD, $frB", FPGeneral, + "fctidz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64; defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB), - "fcfidu", "$frD, $frB", FPGeneral, + "fcfidu", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64; defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB), - "fcfids", "$frD, $frB", FPGeneral, + "fcfids", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64; defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB), - "fcfidus", "$frD, $frB", FPGeneral, + "fcfidus", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64; defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiduz", "$frD, $frB", FPGeneral, + "fctiduz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64; defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiwuz", "$frD, $frB", FPGeneral, + "fctiwuz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64; } @@ -1006,6 +1052,14 @@ def : Pat<(i64 (anyext i32:$in)), def : Pat<(i32 (trunc i64:$in)), (EXTRACT_SUBREG $in, sub_32)>; +// Implement the 'not' operation with the NOR instruction. +// (we could use the default xori pattern, but nor has lower latency on some +// cores (such as the A2)). +def i64not : OutPatFrag<(ops node:$in), + (NOR8 $in, $in)>; +def : Pat<(not i64:$in), + (i64not $in)>; + // Extending loads with i64 targets. def : Pat<(zextloadi1 iaddr:$src), (LBZ8 iaddr:$src)>; diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index a55abe373556..b271b5d5aa21 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -22,111 +22,160 @@ def vnot_ppc : PatFrag<(ops node:$in), def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false); + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG); }]>; def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false); + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG); }]>; def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true); + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG); }]>; def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true); + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG); }]>; +// These fragments are provided for little-endian, where the inputs must be +// swapped for correct semantics. +def vpkuhum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG); +}]>; +def vpkuwum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG); +}]>; def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG); }]>; def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG); }]>; def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG); }]>; def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG); }]>; def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG); }]>; def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG); }]>; def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG); }]>; def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG); }]>; def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG); }]>; def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG); }]>; def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG); }]>; def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG); +}]>; + + +// These fragments are provided for little-endian, where the inputs must be +// swapped for correct semantics. +def vmrglb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG); +}]>; +def vmrglh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG); +}]>; +def vmrglw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG); +}]>; +def vmrghb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG); +}]>; +def vmrghh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG); +}]>; +def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG); }]>; def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::isVSLDOIShuffleMask(N, false)); + return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG)); }]>; def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVSLDOIShuffleMask(N, false) != -1; + return PPC::isVSLDOIShuffleMask(N, 0, *CurDAG) != -1; }], VSLDOI_get_imm>; /// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into /// vector_shuffle(X,undef,mask) by the dag combiner. def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::isVSLDOIShuffleMask(N, true)); + return getI32Imm(PPC::isVSLDOIShuffleMask(N, 1, *CurDAG)); }]>; def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVSLDOIShuffleMask(N, true) != -1; + return PPC::isVSLDOIShuffleMask(N, 1, *CurDAG) != -1; }], VSLDOI_unary_get_imm>; +/// VSLDOI_swapped* - These fragments are provided for little-endian, where +/// the inputs must be swapped for correct semantics. +def VSLDOI_swapped_get_imm : SDNodeXForm<vector_shuffle, [{ + return getI32Imm(PPC::isVSLDOIShuffleMask(N, 2, *CurDAG)); +}]>; +def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVSLDOIShuffleMask(N, 2, *CurDAG) != -1; +}], VSLDOI_get_imm>; + + // VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm. def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::getVSPLTImmediate(N, 1)); + return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG)); }]>; def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1); }], VSPLTB_get_imm>; def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::getVSPLTImmediate(N, 2)); + return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG)); }]>; def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2); }], VSPLTH_get_imm>; def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::getVSPLTImmediate(N, 4)); + return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG)); }]>; def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ @@ -164,7 +213,7 @@ def vecspltisw : PatLeaf<(build_vector), [{ // VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type. class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty> : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), - !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, + !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP, [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>; // VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the @@ -172,7 +221,7 @@ class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty> class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType InTy> : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), - !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, + !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP, [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>; // VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two @@ -180,14 +229,14 @@ class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy, class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType In1Ty, ValueType In2Ty> : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), - !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, + !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP, [(set OutTy:$vD, (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>; // VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type. class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - !strconcat(opc, " $vD, $vA, $vB"), VecFP, + !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>; // VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the @@ -195,7 +244,7 @@ class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType InTy> : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - !strconcat(opc, " $vD, $vA, $vB"), VecFP, + !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>; // VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two @@ -203,13 +252,13 @@ class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType In1Ty, ValueType In2Ty> : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - !strconcat(opc, " $vD, $vA, $vB"), VecFP, + !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>; // VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type. class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID> : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB), - !strconcat(opc, " $vD, $vB"), VecFP, + !strconcat(opc, " $vD, $vB"), IIC_VecFP, [(set v4f32:$vD, (IntID v4f32:$vB))]>; // VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the @@ -217,128 +266,130 @@ class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID> class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType InTy> : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB), - !strconcat(opc, " $vD, $vB"), VecFP, + !strconcat(opc, " $vD, $vB"), IIC_VecFP, [(set OutTy:$vD, (IntID InTy:$vB))]>; //===----------------------------------------------------------------------===// // Instruction Definitions. -def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">; +def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">; let Predicates = [HasAltivec] in { let isCodeGenOnly = 1 in { def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), - "dss $STRM", LdStLoad /*FIXME*/, []>, + "dss $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated<DeprecatedDST>; def DSSALL : DSS_Form<822, (outs), (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2), - "dssall", LdStLoad /*FIXME*/, []>, + "dssall", IIC_LdStLoad /*FIXME*/, []>, Deprecated<DeprecatedDST>; def DST : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated<DeprecatedDST>; def DSTT : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated<DeprecatedDST>; def DSTST : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated<DeprecatedDST>; def DSTSTT : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated<DeprecatedDST>; def DST64 : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated<DeprecatedDST>; def DSTT64 : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated<DeprecatedDST>; def DSTST64 : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated<DeprecatedDST>; def DSTSTT64 : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated<DeprecatedDST>; } def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), - "mfvscr $vD", LdStStore, + "mfvscr $vD", IIC_LdStStore, [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), - "mtvscr $vB", LdStLoad, + "mtvscr $vB", IIC_LdStLoad, [(int_ppc_altivec_mtvscr v4i32:$vB)]>; let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src), - "lvebx $vD, $src", LdStLoad, + "lvebx $vD, $src", IIC_LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; def LVEHX: XForm_1<31, 39, (outs vrrc:$vD), (ins memrr:$src), - "lvehx $vD, $src", LdStLoad, + "lvehx $vD, $src", IIC_LdStLoad, [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; def LVEWX: XForm_1<31, 71, (outs vrrc:$vD), (ins memrr:$src), - "lvewx $vD, $src", LdStLoad, + "lvewx $vD, $src", IIC_LdStLoad, [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; def LVX : XForm_1<31, 103, (outs vrrc:$vD), (ins memrr:$src), - "lvx $vD, $src", LdStLoad, + "lvx $vD, $src", IIC_LdStLoad, [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; def LVXL : XForm_1<31, 359, (outs vrrc:$vD), (ins memrr:$src), - "lvxl $vD, $src", LdStLoad, + "lvxl $vD, $src", IIC_LdStLoad, [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; } def LVSL : XForm_1<31, 6, (outs vrrc:$vD), (ins memrr:$src), - "lvsl $vD, $src", LdStLoad, + "lvsl $vD, $src", IIC_LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, PPC970_Unit_LSU; def LVSR : XForm_1<31, 38, (outs vrrc:$vD), (ins memrr:$src), - "lvsr $vD, $src", LdStLoad, + "lvsr $vD, $src", IIC_LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, PPC970_Unit_LSU; let PPC970_Unit = 2 in { // Stores. def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst), - "stvebx $rS, $dst", LdStStore, + "stvebx $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>; def STVEHX: XForm_8<31, 167, (outs), (ins vrrc:$rS, memrr:$dst), - "stvehx $rS, $dst", LdStStore, + "stvehx $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>; def STVEWX: XForm_8<31, 199, (outs), (ins vrrc:$rS, memrr:$dst), - "stvewx $rS, $dst", LdStStore, + "stvewx $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>; def STVX : XForm_8<31, 231, (outs), (ins vrrc:$rS, memrr:$dst), - "stvx $rS, $dst", LdStStore, + "stvx $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>; def STVXL : XForm_8<31, 487, (outs), (ins vrrc:$rS, memrr:$dst), - "stvxl $rS, $dst", LdStStore, + "stvxl $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>; } let PPC970_Unit = 5 in { // VALU Operations. // VA-Form instructions. 3-input AltiVec ops. +let isCommutable = 1 in { def VMADDFP : VAForm_1<46, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB), - "vmaddfp $vD, $vA, $vC, $vB", VecFP, + "vmaddfp $vD, $vA, $vC, $vB", IIC_VecFP, [(set v4f32:$vD, (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>; // FIXME: The fma+fneg pattern won't match because fneg is not legal. def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB), - "vnmsubfp $vD, $vA, $vC, $vB", VecFP, + "vnmsubfp $vD, $vA, $vC, $vB", IIC_VecFP, [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC, - (fneg v4f32:$vB))))]>; + (fneg v4f32:$vB))))]>; def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>; def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs, v8i16>; def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>; +} // isCommutable def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm, v4i32, v4i32, v16i8>; @@ -346,23 +397,24 @@ def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>; // Shuffles. def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u5imm:$SH), - "vsldoi $vD, $vA, $vB, $SH", VecFP, + "vsldoi $vD, $vA, $vB, $SH", IIC_VecFP, [(set v16i8:$vD, (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>; // VX-Form instructions. AltiVec arithmetic ops. +let isCommutable = 1 in { def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vaddfp $vD, $vA, $vB", VecFP, + "vaddfp $vD, $vA, $vB", IIC_VecFP, [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>; def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vaddubm $vD, $vA, $vB", VecGeneral, + "vaddubm $vD, $vA, $vB", IIC_VecGeneral, [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>; def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vadduhm $vD, $vA, $vB", VecGeneral, + "vadduhm $vD, $vA, $vB", IIC_VecGeneral, [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>; def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vadduwm $vD, $vA, $vB", VecGeneral, + "vadduwm $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>; def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>; @@ -372,30 +424,31 @@ def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>; def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>; def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>; def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>; - - +} // isCommutable + +let isCommutable = 1 in def VAND : VXForm_1<1028, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vand $vD, $vA, $vB", VecFP, + "vand $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>; def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vandc $vD, $vA, $vB", VecFP, + "vandc $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (and v4i32:$vA, (vnot_ppc v4i32:$vB)))]>; def VCFSX : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vcfsx $vD, $vB, $UIMM", VecFP, + "vcfsx $vD, $vB, $UIMM", IIC_VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>; def VCFUX : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vcfux $vD, $vB, $UIMM", VecFP, + "vcfux $vD, $vB, $UIMM", IIC_VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>; def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vctsxs $vD, $vB, $UIMM", VecFP, + "vctsxs $vD, $vB, $UIMM", IIC_VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>; def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vctuxs $vD, $vB, $UIMM", VecFP, + "vctuxs $vD, $vB, $UIMM", IIC_VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>; @@ -404,25 +457,26 @@ def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), // to floating-point (sint_to_fp/uint_to_fp) conversions. let isCodeGenOnly = 1, VA = 0 in { def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB), - "vcfsx $vD, $vB, 0", VecFP, + "vcfsx $vD, $vB, 0", IIC_VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>; def VCTUXS_0 : VXForm_1<906, (outs vrrc:$vD), (ins vrrc:$vB), - "vctuxs $vD, $vB, 0", VecFP, + "vctuxs $vD, $vB, 0", IIC_VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>; def VCFUX_0 : VXForm_1<778, (outs vrrc:$vD), (ins vrrc:$vB), - "vcfux $vD, $vB, 0", VecFP, + "vcfux $vD, $vB, 0", IIC_VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfux v4i32:$vB, 0))]>; def VCTSXS_0 : VXForm_1<970, (outs vrrc:$vD), (ins vrrc:$vB), - "vctsxs $vD, $vB, 0", VecFP, + "vctsxs $vD, $vB, 0", IIC_VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>; } def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>; def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>; +let isCommutable = 1 in { def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>; def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>; def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>; @@ -444,24 +498,25 @@ def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>; def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>; def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>; def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>; +} // isCommutable def VMRGHB : VXForm_1< 12, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrghb $vD, $vA, $vB", VecFP, + "vmrghb $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGHH : VXForm_1< 76, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrghh $vD, $vA, $vB", VecFP, + "vmrghh $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGHW : VXForm_1<140, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrghw $vD, $vA, $vB", VecFP, + "vmrghw $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGLB : VXForm_1<268, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrglb $vD, $vA, $vB", VecFP, + "vmrglb $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGLH : VXForm_1<332, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrglh $vD, $vA, $vB", VecFP, + "vmrglh $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGLW : VXForm_1<396, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrglw $vD, $vA, $vB", VecFP, + "vmrglw $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>; def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm, @@ -477,6 +532,7 @@ def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm, def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs, v4i32, v8i16, v4i32>; +let isCommutable = 1 in { def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb, v8i16, v16i8>; def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh, @@ -493,6 +549,7 @@ def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub, v8i16, v16i8>; def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh, v4i32, v8i16>; +} // isCommutable def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>; def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>; @@ -504,16 +561,16 @@ def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>; def VSUBCUW : VX1_Int_Ty<1408, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>; def VSUBFP : VXForm_1<74, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsubfp $vD, $vA, $vB", VecGeneral, + "vsubfp $vD, $vA, $vB", IIC_VecGeneral, [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>; def VSUBUBM : VXForm_1<1024, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsububm $vD, $vA, $vB", VecGeneral, + "vsububm $vD, $vA, $vB", IIC_VecGeneral, [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>; def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsubuhm $vD, $vA, $vB", VecGeneral, + "vsubuhm $vD, $vA, $vB", IIC_VecGeneral, [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>; def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsubuwm $vD, $vA, $vB", VecGeneral, + "vsubuwm $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>; def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>; @@ -534,15 +591,17 @@ def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs, v4i32, v16i8, v4i32>; def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vnor $vD, $vA, $vB", VecFP, + "vnor $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA, v4i32:$vB)))]>; +let isCommutable = 1 in { def VOR : VXForm_1<1156, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vor $vD, $vA, $vB", VecFP, + "vor $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>; def VXOR : VXForm_1<1220, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vxor $vD, $vA, $vB", VecFP, + "vxor $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>; +} // isCommutable def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>; def VRLH : VX1_Int_Ty< 68, "vrlh", int_ppc_altivec_vrlh, v8i16>; @@ -556,15 +615,15 @@ def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>; def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>; def VSPLTB : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vspltb $vD, $vB, $UIMM", VecPerm, + "vspltb $vD, $vB, $UIMM", IIC_VecPerm, [(set v16i8:$vD, (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>; def VSPLTH : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vsplth $vD, $vB, $UIMM", VecPerm, + "vsplth $vD, $vB, $UIMM", IIC_VecPerm, [(set v16i8:$vD, (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>; def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vspltw $vD, $vB, $UIMM", VecPerm, + "vspltw $vD, $vB, $UIMM", IIC_VecPerm, [(set v16i8:$vD, (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>; @@ -580,13 +639,13 @@ def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>; def VSPLTISB : VXForm_3<780, (outs vrrc:$vD), (ins s5imm:$SIMM), - "vspltisb $vD, $SIMM", VecPerm, + "vspltisb $vD, $SIMM", IIC_VecPerm, [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>; def VSPLTISH : VXForm_3<844, (outs vrrc:$vD), (ins s5imm:$SIMM), - "vspltish $vD, $SIMM", VecPerm, + "vspltish $vD, $SIMM", IIC_VecPerm, [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>; def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM), - "vspltisw $vD, $SIMM", VecPerm, + "vspltisw $vD, $SIMM", IIC_VecPerm, [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>; // Vector Pack. @@ -601,13 +660,13 @@ def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss, def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus, v8i16, v4i32>; def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vpkuhum $vD, $vA, $vB", VecFP, + "vpkuhum $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>; def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus, v16i8, v8i16>; def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vpkuwum $vD, $vA, $vB", VecFP, + "vpkuwum $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>; def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus, @@ -631,10 +690,12 @@ def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh, // Altivec Comparisons. class VCMP<bits<10> xo, string asmstr, ValueType Ty> - : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),asmstr,VecFPCompare, + : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr, + IIC_VecFPCompare, [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>; class VCMPo<bits<10> xo, string asmstr, ValueType Ty> - : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),asmstr,VecFPCompare, + : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr, + IIC_VecFPCompare, [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> { let Defs = [CR6]; let RC = 1; @@ -676,24 +737,24 @@ def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; let isCodeGenOnly = 1 in { def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins), - "vxor $vD, $vD, $vD", VecFP, + "vxor $vD, $vD, $vD", IIC_VecFP, [(set v16i8:$vD, (v16i8 immAllZerosV))]>; def V_SET0H : VXForm_setzero<1220, (outs vrrc:$vD), (ins), - "vxor $vD, $vD, $vD", VecFP, + "vxor $vD, $vD, $vD", IIC_VecFP, [(set v8i16:$vD, (v8i16 immAllZerosV))]>; def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins), - "vxor $vD, $vD, $vD", VecFP, + "vxor $vD, $vD, $vD", IIC_VecFP, [(set v4i32:$vD, (v4i32 immAllZerosV))]>; let IMM=-1 in { def V_SETALLONESB : VXForm_3<908, (outs vrrc:$vD), (ins), - "vspltisw $vD, -1", VecFP, + "vspltisw $vD, -1", IIC_VecFP, [(set v16i8:$vD, (v16i8 immAllOnesV))]>; def V_SETALLONESH : VXForm_3<908, (outs vrrc:$vD), (ins), - "vspltisw $vD, -1", VecFP, + "vspltisw $vD, -1", IIC_VecFP, [(set v8i16:$vD, (v8i16 immAllOnesV))]>; def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins), - "vspltisw $vD, -1", VecFP, + "vspltisw $vD, -1", IIC_VecFP, [(set v4i32:$vD, (v4i32 immAllOnesV))]>; } } @@ -761,6 +822,16 @@ def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef), def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef), (VPKUHUM $vA, $vA)>; +// Match vsldoi(y,x), vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands. +// These fragments are matched for little-endian, where the inputs must +// be swapped for correct semantics. +def:Pat<(vsldoi_swapped_shuffle:$in v16i8:$vA, v16i8:$vB), + (VSLDOI $vB, $vA, (VSLDOI_swapped_get_imm $in))>; +def:Pat<(vpkuwum_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VPKUWUM $vB, $vA)>; +def:Pat<(vpkuhum_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VPKUHUM $vB, $vA)>; + // Match vmrg*(x,x) def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef), (VMRGLB $vA, $vA)>; @@ -775,6 +846,22 @@ def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef), def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef), (VMRGHW $vA, $vA)>; +// Match vmrg*(y,x), i.e., swapped operands. These fragments +// are matched for little-endian, where the inputs must be +// swapped for correct semantics. +def:Pat<(vmrglb_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGLB $vB, $vA)>; +def:Pat<(vmrglh_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGLH $vB, $vA)>; +def:Pat<(vmrglw_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGLW $vB, $vA)>; +def:Pat<(vmrghb_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGHB $vB, $vA)>; +def:Pat<(vmrghh_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGHH $vB, $vA)>; +def:Pat<(vmrghw_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGHW $vB, $vA)>; + // Logical Operations def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>; diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 29233d49148d..1e4396cd1017 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -14,6 +14,8 @@ class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : Instruction { field bits<32> Inst; + field bits<32> SoftFail = 0; + let Size = 4; bit PPC64 = 0; // Default value, override with isPPC64 @@ -67,6 +69,8 @@ class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : Instruction { field bits<64> Inst; + field bits<64> SoftFail = 0; + let Size = 8; bit PPC64 = 0; // Default value, override with isPPC64 @@ -109,7 +113,7 @@ class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, // 1.7.2 B-Form class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> - : I<opcode, OOL, IOL, asmstr, BrB> { + : I<opcode, OOL, IOL, asmstr, IIC_BrB> { bits<7> BIBO; // 2 bits of BI and 5 bits of BO. bits<3> CR; bits<14> BD; @@ -135,7 +139,7 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, dag OOL, dag IOL, string asmstr> - : I<opcode, OOL, IOL, asmstr, BrB> { + : I<opcode, OOL, IOL, asmstr, IIC_BrB> { bits<14> BD; let Inst{6-10} = bo; @@ -147,7 +151,7 @@ class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, class BForm_3<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> - : I<opcode, OOL, IOL, asmstr, BrB> { + : I<opcode, OOL, IOL, asmstr, IIC_BrB> { bits<5> BO; bits<5> BI; bits<14> BD; @@ -159,6 +163,19 @@ class BForm_3<bits<6> opcode, bit aa, bit lk, let Inst{31} = lk; } +class BForm_4<bits<6> opcode, bits<5> bo, bit aa, bit lk, + dag OOL, dag IOL, string asmstr> + : I<opcode, OOL, IOL, asmstr, IIC_BrB> { + bits<5> BI; + bits<14> BD; + + let Inst{6-10} = bo; + let Inst{11-15} = BI; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + // 1.7.3 SC-Form class SCForm<bits<6> opcode, bits<1> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, @@ -258,6 +275,15 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr, let Addr = 0; } +class DForm_4_fixedreg_zero<bits<6> opcode, bits<5> R, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list<dag> pattern> + : DForm_4<opcode, OOL, IOL, asmstr, itin, pattern> { + let A = R; + let B = R; + let C = 0; +} + class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> @@ -334,20 +360,6 @@ class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr, let Inst{30-31} = xo; } -class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list<dag> pattern> - : I<opcode, OOL, IOL, asmstr, itin> { - bits<5> RST; - bits<14> DS; - bits<5> RA; - - let Pattern = pattern; - - let Inst{6-10} = RST; - let Inst{11-15} = RA; - let Inst{16-29} = DS; - let Inst{30-31} = xo; -} // 1.7.6 X-Form class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, @@ -567,6 +579,173 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let A = 0; } +// XX*-Form (VSX) +class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = XT{5}; +} + +class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = 0; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX2Form_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> CR; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-8} = CR; + let Inst{9-15} = 0; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +class XX2Form_2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XB; + bits<2> D; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-13} = 0; + let Inst{14-15} = D; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> CR; + bits<6> XA; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-8} = CR; + let Inst{9-10} = 0; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +class XX3Form_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<2> D; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21} = 0; + let Inst{22-23} = D; + let Inst{24-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX3Form_Rc<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21} = RC; + let Inst{22-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX4Form<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<6> XC; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21-25} = XC{4-0}; + let Inst{26-27} = xo; + let Inst{28} = XC{5}; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + // DCB_Form - Form X instruction, used for dcb* instructions. class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> @@ -664,6 +843,12 @@ class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk, let BH = 0; } +class XLForm_2_br2<bits<6> opcode, bits<10> xo, bits<5> bo, bit lk, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> + : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> { + let BO = bo; + let BH = 0; +} class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 80bc27a95765..9bac91d7d412 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -18,26 +18,32 @@ #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "ppc-instr-info" + #define GET_INSTRMAP_INFO #define GET_INSTRINFO_CTOR_DTOR #include "PPCGenInstrInfo.inc" -using namespace llvm; - static cl:: opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops")); @@ -45,26 +51,35 @@ opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt", cl::desc("Disable compare instruction optimization"), cl::Hidden); +static cl::opt<bool> DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation", +cl::desc("Disable VSX FMA instruction mutation"), cl::Hidden); + +static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", +cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), +cl::Hidden); + // Pin the vtable to this file. void PPCInstrInfo::anchor() {} -PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) - : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), - TM(tm), RI(*TM.getSubtargetImpl()) {} +PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI) + : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), + Subtarget(STI), RI(STI) {} /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for /// this target when scheduling the DAG. -ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( - const TargetMachine *TM, - const ScheduleDAG *DAG) const { - unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); +ScheduleHazardRecognizer * +PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, + const ScheduleDAG *DAG) const { + unsigned Directive = + static_cast<const PPCSubtarget *>(STI)->getDarwinDirective(); if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 || Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) { - const InstrItineraryData *II = TM->getInstrItineraryData(); - return new PPCScoreboardHazardRecognizer(II, DAG); + const InstrItineraryData *II = + &static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData(); + return new ScoreboardHazardRecognizer(II, DAG); } - return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); + return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); } /// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer @@ -72,17 +87,72 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( const InstrItineraryData *II, const ScheduleDAG *DAG) const { - unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); + unsigned Directive = + DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); + + if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8) + return new PPCDispatchGroupSBHazardRecognizer(II, DAG); // Most subtargets use a PPC970 recognizer. if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 && Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) { - assert(TM.getInstrInfo() && "No InstrInfo?"); + assert(DAG->TII && "No InstrInfo?"); + + return new PPCHazardRecognizer970(*DAG); + } + + return new ScoreboardHazardRecognizer(II, DAG); +} + + +int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, + unsigned UseIdx) const { + int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx, + UseMI, UseIdx); + + const MachineOperand &DefMO = DefMI->getOperand(DefIdx); + unsigned Reg = DefMO.getReg(); + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + bool IsRegCR; + if (TRI->isVirtualRegister(Reg)) { + const MachineRegisterInfo *MRI = + &DefMI->getParent()->getParent()->getRegInfo(); + IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) || + MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass); + } else { + IsRegCR = PPC::CRRCRegClass.contains(Reg) || + PPC::CRBITRCRegClass.contains(Reg); + } - return new PPCHazardRecognizer970(TM); + if (UseMI->isBranch() && IsRegCR) { + if (Latency < 0) + Latency = getInstrLatency(ItinData, DefMI); + + // On some cores, there is an additional delay between writing to a condition + // register, and using it from a branch. + unsigned Directive = Subtarget.getDarwinDirective(); + switch (Directive) { + default: break; + case PPC::DIR_7400: + case PPC::DIR_750: + case PPC::DIR_970: + case PPC::DIR_E5500: + case PPC::DIR_PWR4: + case PPC::DIR_PWR5: + case PPC::DIR_PWR5X: + case PPC::DIR_PWR6: + case PPC::DIR_PWR6X: + case PPC::DIR_PWR7: + case PPC::DIR_PWR8: + Latency += 2; + break; + } } - return new PPCScoreboardHazardRecognizer(II, DAG); + return Latency; } // Detect 32 -> 64-bit extensions where we may reuse the low sub-register. @@ -110,7 +180,9 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, case PPC::LFS: case PPC::LFD: case PPC::RESTORE_CR: + case PPC::RESTORE_CRBIT: case PPC::LVX: + case PPC::LXVD2X: case PPC::RESTORE_VRSAVE: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). @@ -134,7 +206,9 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, case PPC::STFS: case PPC::STFD: case PPC::SPILL_CR: + case PPC::SPILL_CRBIT: case PPC::STVX: + case PPC::STXVD2X: case PPC::SPILL_VRSAVE: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). @@ -156,12 +230,14 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // Normal instructions can be commuted the obvious way. if (MI->getOpcode() != PPC::RLWIMI && - MI->getOpcode() != PPC::RLWIMIo) + MI->getOpcode() != PPC::RLWIMIo && + MI->getOpcode() != PPC::RLWIMI8 && + MI->getOpcode() != PPC::RLWIMI8o) return TargetInstrInfo::commuteInstruction(MI, NewMI); // Cannot commute if it has a non-zero rotate count. if (MI->getOperand(3).getImm() != 0) - return 0; + return nullptr; // If we have a zero rotate count, we have: // M = mask(MB,ME) @@ -174,6 +250,8 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); unsigned Reg2 = MI->getOperand(2).getReg(); + unsigned SubReg1 = MI->getOperand(1).getSubReg(); + unsigned SubReg2 = MI->getOperand(2).getSubReg(); bool Reg1IsKill = MI->getOperand(1).isKill(); bool Reg2IsKill = MI->getOperand(2).isKill(); bool ChangeReg0 = false; @@ -183,6 +261,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // Must be two address instruction! assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) && "Expecting a two-address instruction!"); + assert(MI->getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch"); Reg2IsKill = false; ChangeReg0 = true; } @@ -203,10 +282,14 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { .addImm((MB-1) & 31); } - if (ChangeReg0) + if (ChangeReg0) { MI->getOperand(0).setReg(Reg2); + MI->getOperand(0).setSubReg(SubReg2); + } MI->getOperand(2).setReg(Reg1); MI->getOperand(1).setReg(Reg2); + MI->getOperand(2).setSubReg(SubReg1); + MI->getOperand(1).setSubReg(SubReg2); MI->getOperand(2).setIsKill(Reg1IsKill); MI->getOperand(1).setIsKill(Reg2IsKill); @@ -216,13 +299,38 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { return MI; } +bool PPCInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const { + // For VSX A-Type FMA instructions, it is the first two operands that can be + // commuted, however, because the non-encoded tied input operand is listed + // first, the operands to swap are actually the second and third. + + int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode()); + if (AltOpc == -1) + return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); + + SrcOpIdx1 = 2; + SrcOpIdx2 = 3; + return true; +} + void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { + // This function is used for scheduling, and the nop wanted here is the type + // that terminates dispatch groups on the POWER cores. + unsigned Directive = Subtarget.getDarwinDirective(); + unsigned Opcode; + switch (Directive) { + default: Opcode = PPC::NOP; break; + case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break; + case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break; + case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */ + } + DebugLoc DL; - BuildMI(MBB, MI, DL, get(PPC::NOP)); + BuildMI(MBB, MI, DL, get(Opcode)); } - // Branch analysis. // Note: If the condition register is set to CTR or CTR8 then this is a // BDNZ (imm == 1) or BDZ (imm == 0) branch. @@ -230,7 +338,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { - bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); @@ -263,6 +371,22 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, Cond.push_back(LastInst->getOperand(0)); Cond.push_back(LastInst->getOperand(1)); return false; + } else if (LastInst->getOpcode() == PPC::BC) { + if (!LastInst->getOperand(1).isMBB()) + return true; + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); + Cond.push_back(LastInst->getOperand(0)); + return false; + } else if (LastInst->getOpcode() == PPC::BCn) { + if (!LastInst->getOperand(1).isMBB()) + return true; + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET)); + Cond.push_back(LastInst->getOperand(0)); + return false; } else if (LastInst->getOpcode() == PPC::BDNZ8 || LastInst->getOpcode() == PPC::BDNZ) { if (!LastInst->getOperand(0).isMBB()) @@ -310,6 +434,26 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, Cond.push_back(SecondLastInst->getOperand(1)); FBB = LastInst->getOperand(0).getMBB(); return false; + } else if (SecondLastInst->getOpcode() == PPC::BC && + LastInst->getOpcode() == PPC::B) { + if (!SecondLastInst->getOperand(1).isMBB() || + !LastInst->getOperand(0).isMBB()) + return true; + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (SecondLastInst->getOpcode() == PPC::BCn && + LastInst->getOpcode() == PPC::B) { + if (!SecondLastInst->getOperand(1).isMBB() || + !LastInst->getOperand(0).isMBB()) + return true; + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET)); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; } else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 || SecondLastInst->getOpcode() == PPC::BDNZ) && LastInst->getOpcode() == PPC::B) { @@ -367,6 +511,7 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { --I; } if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC && + I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn && I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ && I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ) return 0; @@ -379,6 +524,7 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { if (I == MBB.begin()) return 1; --I; if (I->getOpcode() != PPC::BCC && + I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn && I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ && I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ) return 1; @@ -398,19 +544,23 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, assert((Cond.size() == 2 || Cond.size() == 0) && "PPC branch conditions have two components!"); - bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); // One-way branch. - if (FBB == 0) { + if (!FBB) { if (Cond.empty()) // Unconditional branch BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB); else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8) BuildMI(&MBB, DL, get(Cond[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB); + else if (Cond[0].getImm() == PPC::PRED_BIT_SET) + BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB); + else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET) + BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB); else // Conditional branch BuildMI(&MBB, DL, get(PPC::BCC)) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB); return 1; } @@ -419,9 +569,13 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, BuildMI(&MBB, DL, get(Cond[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB); + else if (Cond[0].getImm() == PPC::PRED_BIT_SET) + BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB); + else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET) + BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB); else BuildMI(&MBB, DL, get(PPC::BCC)) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB); BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB); return 2; } @@ -431,7 +585,7 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand> &Cond, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const { - if (!TM.getSubtargetImpl()->hasISEL()) + if (!Subtarget.hasISEL()) return false; if (Cond.size() != 2) @@ -475,7 +629,7 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, assert(Cond.size() == 2 && "PPC branch conditions have two components!"); - assert(TM.getSubtargetImpl()->hasISEL() && + assert(Subtarget.hasISEL() && "Cannot insert select on target without ISEL support"); // Get the register classes. @@ -506,6 +660,8 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break; case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break; case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break; + case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break; + case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break; } unsigned FirstReg = SwapOps ? FalseReg : TrueReg, @@ -534,6 +690,47 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { + // We can end up with self copies and similar things as a result of VSX copy + // legalization. Promote them here. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + if (PPC::F8RCRegClass.contains(DestReg) && + PPC::VSLRCRegClass.contains(SrcReg)) { + unsigned SuperReg = + TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass); + + if (VSXSelfCopyCrash && SrcReg == SuperReg) + llvm_unreachable("nop VSX copy"); + + DestReg = SuperReg; + } else if (PPC::VRRCRegClass.contains(DestReg) && + PPC::VSHRCRegClass.contains(SrcReg)) { + unsigned SuperReg = + TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass); + + if (VSXSelfCopyCrash && SrcReg == SuperReg) + llvm_unreachable("nop VSX copy"); + + DestReg = SuperReg; + } else if (PPC::F8RCRegClass.contains(SrcReg) && + PPC::VSLRCRegClass.contains(DestReg)) { + unsigned SuperReg = + TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass); + + if (VSXSelfCopyCrash && DestReg == SuperReg) + llvm_unreachable("nop VSX copy"); + + SrcReg = SuperReg; + } else if (PPC::VRRCRegClass.contains(SrcReg) && + PPC::VSHRCRegClass.contains(DestReg)) { + unsigned SuperReg = + TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass); + + if (VSXSelfCopyCrash && DestReg == SuperReg) + llvm_unreachable("nop VSX copy"); + + SrcReg = SuperReg; + } + unsigned Opc; if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::OR; @@ -545,6 +742,18 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = PPC::MCRF; else if (PPC::VRRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::VOR; + else if (PPC::VSRCRegClass.contains(DestReg, SrcReg)) + // There are two different ways this can be done: + // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only + // issue in VSU pipeline 0. + // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but + // can go to either pipeline. + // We'll always use xxlor here, because in practically all cases where + // copies are generated, they are close enough to some use that the + // lower-latency form is preferable. + Opc = PPC::XXLOR; + else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::XXLORf; else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; else @@ -599,47 +808,31 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, FrameIdx)); return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { - // FIXME: We use CRi here because there is no mtcrf on a bit. Since the - // backend currently only uses CR1EQ as an individual bit, this should - // not cause any bug. If we need other uses of CR bits, the following - // code may be invalid. - unsigned Reg = 0; - if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || - SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) - Reg = PPC::CR0; - else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || - SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) - Reg = PPC::CR1; - else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || - SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) - Reg = PPC::CR2; - else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || - SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) - Reg = PPC::CR3; - else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || - SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) - Reg = PPC::CR4; - else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || - SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) - Reg = PPC::CR5; - else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || - SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) - Reg = PPC::CR6; - else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || - SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) - Reg = PPC::CR7; - - return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, - &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS); - + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CRBIT)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + return true; } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX)) .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); NonRI = true; + } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; + } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSDX)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { - assert(TM.getSubtargetImpl()->isDarwin() && + assert(Subtarget.isDarwin() && "VRSAVE only needs spill/restore on Darwin"); NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE)) .addReg(SrcReg, @@ -717,42 +910,24 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, FrameIdx)); return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { - - unsigned Reg = 0; - if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT || - DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN) - Reg = PPC::CR0; - else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT || - DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN) - Reg = PPC::CR1; - else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT || - DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN) - Reg = PPC::CR2; - else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT || - DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN) - Reg = PPC::CR3; - else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT || - DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN) - Reg = PPC::CR4; - else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT || - DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN) - Reg = PPC::CR5; - else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT || - DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN) - Reg = PPC::CR6; - else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT || - DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN) - Reg = PPC::CR7; - - return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, - &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS); - + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_CRBIT), DestReg), + FrameIdx)); + return true; } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg), FrameIdx)); NonRI = true; + } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg), + FrameIdx)); + NonRI = true; + } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg), + FrameIdx)); + NonRI = true; } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { - assert(TM.getSubtargetImpl()->isDarwin() && + assert(Subtarget.isDarwin() && "VRSAVE only needs spill/restore on Darwin"); NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::RESTORE_VRSAVE), @@ -866,7 +1041,7 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, unsigned ZeroReg; if (UseInfo->isLookupPtrRegClass()) { - bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO; } else { ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ? @@ -933,13 +1108,21 @@ bool PPCInstrInfo::PredicateInstruction( unsigned OpC = MI->getOpcode(); if (OpC == PPC::BLR) { if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) { - bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); MI->setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR))); - } else { + } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { MI->setDesc(get(PPC::BCLR)); MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); + } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { + MI->setDesc(get(PPC::BCLRn)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); + } else { + MI->setDesc(get(PPC::BCCLR)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) .addImm(Pred[0].getImm()) .addReg(Pred[1].getReg()); } @@ -947,10 +1130,26 @@ bool PPCInstrInfo::PredicateInstruction( return true; } else if (OpC == PPC::B) { if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) { - bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); MI->setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))); + } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) { + MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); + MI->RemoveOperand(0); + + MI->setDesc(get(PPC::BC)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()) + .addMBB(MBB); + } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { + MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); + MI->RemoveOperand(0); + + MI->setDesc(get(PPC::BCn)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()) + .addMBB(MBB); } else { MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); MI->RemoveOperand(0); @@ -969,9 +1168,24 @@ bool PPCInstrInfo::PredicateInstruction( llvm_unreachable("Cannot predicate bctr[l] on the ctr register"); bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8; - bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); - MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) : - (setLR ? PPC::BCCTRL : PPC::BCCTR))); + bool isPPC64 = Subtarget.isPPC64(); + + if (Pred[0].getImm() == PPC::PRED_BIT_SET) { + MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) : + (setLR ? PPC::BCCTRL : PPC::BCCTR))); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); + return true; + } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) { + MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n) : + (setLR ? PPC::BCCTRLn : PPC::BCCTRn))); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(Pred[1].getReg()); + return true; + } + + MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8) : + (setLR ? PPC::BCCCTRL : PPC::BCCCTR))); MachineInstrBuilder(*MI->getParent()->getParent(), MI) .addImm(Pred[0].getImm()) .addReg(Pred[1].getReg()); @@ -1115,7 +1329,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, // for equality checks (as those don't depend on the sign). On PPC64, // we are restricted to equality for unsigned 64-bit comparisons and for // signed 32-bit comparisons the applicability is more restricted. - bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW; bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW; bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD; @@ -1156,8 +1370,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, if (equalityOnly) { // We need to check the uses of the condition register in order to reject // non-equality comparisons. - for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg), - IE = MRI->use_end(); I != IE; ++I) { + for (MachineRegisterInfo::use_instr_iterator I =MRI->use_instr_begin(CRReg), + IE = MRI->use_instr_end(); I != IE; ++I) { MachineInstr *UseMI = &*I; if (UseMI->getOpcode() == PPC::BCC) { unsigned Pred = UseMI->getOperand(0).getImm(); @@ -1179,8 +1393,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end(); I != EL; ++I) { bool FoundUse = false; - for (MachineRegisterInfo::use_iterator J = MRI->use_begin(CRReg), - JE = MRI->use_end(); J != JE; ++J) + for (MachineRegisterInfo::use_instr_iterator J =MRI->use_instr_begin(CRReg), + JE = MRI->use_instr_end(); J != JE; ++J) if (&*J == &*I) { FoundUse = true; break; @@ -1193,10 +1407,10 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, // There are two possible candidates which can be changed to set CR[01]. // One is MI, the other is a SUB instruction. // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). - MachineInstr *Sub = NULL; + MachineInstr *Sub = nullptr; if (SrcReg2 != 0) // MI is not a candidate for CMPrr. - MI = NULL; + MI = nullptr; // FIXME: Conservatively refuse to convert an instruction which isn't in the // same BB as the comparison. This is to allow the check below to avoid calls // (and other explicit clobbers); instead we should really check for these @@ -1289,15 +1503,16 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, } if (ShouldSwap) - for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg), - IE = MRI->use_end(); I != IE; ++I) { + for (MachineRegisterInfo::use_instr_iterator + I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end(); + I != IE; ++I) { MachineInstr *UseMI = &*I; if (UseMI->getOpcode() == PPC::BCC) { PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm(); assert((!equalityOnly || Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) && "Invalid predicate for equality-only optimization"); - PredsToUpdate.push_back(std::make_pair(&((*I).getOperand(0)), + PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), PPC::getSwappedPredicate(Pred))); } else if (UseMI->getOpcode() == PPC::ISEL || UseMI->getOpcode() == PPC::ISEL8) { @@ -1310,7 +1525,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, else if (NewSubReg == PPC::sub_gt) NewSubReg = PPC::sub_lt; - SubRegsToUpdate.push_back(std::make_pair(&((*I).getOperand(3)), + SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)), NewSubReg)); } else // We need to abort on a user we don't understand. return false; @@ -1322,7 +1537,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, CmpInstr->eraseFromParent(); MachineBasicBlock::iterator MII = MI; - BuildMI(*MI->getParent(), llvm::next(MII), MI->getDebugLoc(), + BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(), get(TargetOpcode::COPY), CRReg) .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0); @@ -1367,26 +1582,508 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, /// instruction may be. This returns the maximum number of bytes. /// unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - case PPC::INLINEASM: { // Inline Asm: Variable size. + unsigned Opcode = MI->getOpcode(); + + if (Opcode == PPC::INLINEASM) { const MachineFunction *MF = MI->getParent()->getParent(); const char *AsmStr = MI->getOperand(0).getSymbolName(); return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); - } - case PPC::PROLOG_LABEL: - case PPC::EH_LABEL: - case PPC::GC_LABEL: - case PPC::DBG_VALUE: - return 0; - case PPC::BL8_NOP: - case PPC::BLA8_NOP: - return 8; - default: - return 4; // PowerPC instructions are all 4 bytes + } else { + const MCInstrDesc &Desc = get(Opcode); + return Desc.getSize(); } } #undef DEBUG_TYPE +#define DEBUG_TYPE "ppc-vsx-fma-mutate" + +namespace { + // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers + // (Altivec and scalar floating-point registers), we need to transform the + // copies into subregister copies with other restrictions. + struct PPCVSXFMAMutate : public MachineFunctionPass { + static char ID; + PPCVSXFMAMutate() : MachineFunctionPass(ID) { + initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); + } + + LiveIntervals *LIS; + + const PPCTargetMachine *TM; + const PPCInstrInfo *TII; + +protected: + bool processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); + I != IE; ++I) { + MachineInstr *MI = I; + + // The default (A-type) VSX FMA form kills the addend (it is taken from + // the target register, which is then updated to reflect the result of + // the FMA). If the instruction, however, kills one of the registers + // used for the product, then we can use the M-form instruction (which + // will take that value from the to-be-defined register). + + int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode()); + if (AltOpc == -1) + continue; + + // This pass is run after register coalescing, and so we're looking for + // a situation like this: + // ... + // %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9 + // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16, + // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16 + // ... + // %vreg9<def,tied1> = XSMADDADP %vreg9<tied0>, %vreg17, %vreg19, + // %RM<imp-use>; VSLRC:%vreg9,%vreg17,%vreg19 + // ... + // Where we can eliminate the copy by changing from the A-type to the + // M-type instruction. Specifically, for this example, this means: + // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16, + // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16 + // is replaced by: + // %vreg16<def,tied1> = XSMADDMDP %vreg16<tied0>, %vreg18, %vreg9, + // %RM<imp-use>; VSLRC:%vreg16,%vreg18,%vreg9 + // and we remove: %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9 + + SlotIndex FMAIdx = LIS->getInstructionIndex(MI); + + VNInfo *AddendValNo = + LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn(); + MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def); + + // The addend and this instruction must be in the same block. + + if (!AddendMI || AddendMI->getParent() != MI->getParent()) + continue; + + // The addend must be a full copy within the same register class. + + if (!AddendMI->isFullCopy()) + continue; + + unsigned AddendSrcReg = AddendMI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) { + if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) != + MRI.getRegClass(AddendSrcReg)) + continue; + } else { + // If AddendSrcReg is a physical register, make sure the destination + // register class contains it. + if (!MRI.getRegClass(AddendMI->getOperand(0).getReg()) + ->contains(AddendSrcReg)) + continue; + } + + // In theory, there could be other uses of the addend copy before this + // fma. We could deal with this, but that would require additional + // logic below and I suspect it will not occur in any relevant + // situations. + bool OtherUsers = false; + for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI); + J != JE; --J) + if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) { + OtherUsers = true; + break; + } + + if (OtherUsers) + continue; + + // Find one of the product operands that is killed by this instruction. + + unsigned KilledProdOp = 0, OtherProdOp = 0; + if (LIS->getInterval(MI->getOperand(2).getReg()) + .Query(FMAIdx).isKill()) { + KilledProdOp = 2; + OtherProdOp = 3; + } else if (LIS->getInterval(MI->getOperand(3).getReg()) + .Query(FMAIdx).isKill()) { + KilledProdOp = 3; + OtherProdOp = 2; + } + + // If there are no killed product operands, then this transformation is + // likely not profitable. + if (!KilledProdOp) + continue; + + // In order to replace the addend here with the source of the copy, + // it must still be live here. + if (!LIS->getInterval(AddendMI->getOperand(1).getReg()).liveAt(FMAIdx)) + continue; + + // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3. + + unsigned AddReg = AddendMI->getOperand(1).getReg(); + unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg(); + unsigned OtherProdReg = MI->getOperand(OtherProdOp).getReg(); + + unsigned AddSubReg = AddendMI->getOperand(1).getSubReg(); + unsigned KilledProdSubReg = MI->getOperand(KilledProdOp).getSubReg(); + unsigned OtherProdSubReg = MI->getOperand(OtherProdOp).getSubReg(); + + bool AddRegKill = AddendMI->getOperand(1).isKill(); + bool KilledProdRegKill = MI->getOperand(KilledProdOp).isKill(); + bool OtherProdRegKill = MI->getOperand(OtherProdOp).isKill(); + + bool AddRegUndef = AddendMI->getOperand(1).isUndef(); + bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef(); + bool OtherProdRegUndef = MI->getOperand(OtherProdOp).isUndef(); + + unsigned OldFMAReg = MI->getOperand(0).getReg(); + + assert(OldFMAReg == AddendMI->getOperand(0).getReg() && + "Addend copy not tied to old FMA output!"); + + DEBUG(dbgs() << "VSX FMA Mutation:\n " << *MI;); + + MI->getOperand(0).setReg(KilledProdReg); + MI->getOperand(1).setReg(KilledProdReg); + MI->getOperand(3).setReg(AddReg); + MI->getOperand(2).setReg(OtherProdReg); + + MI->getOperand(0).setSubReg(KilledProdSubReg); + MI->getOperand(1).setSubReg(KilledProdSubReg); + MI->getOperand(3).setSubReg(AddSubReg); + MI->getOperand(2).setSubReg(OtherProdSubReg); + + MI->getOperand(1).setIsKill(KilledProdRegKill); + MI->getOperand(3).setIsKill(AddRegKill); + MI->getOperand(2).setIsKill(OtherProdRegKill); + + MI->getOperand(1).setIsUndef(KilledProdRegUndef); + MI->getOperand(3).setIsUndef(AddRegUndef); + MI->getOperand(2).setIsUndef(OtherProdRegUndef); + + MI->setDesc(TII->get(AltOpc)); + + DEBUG(dbgs() << " -> " << *MI); + + // The killed product operand was killed here, so we can reuse it now + // for the result of the fma. + + LiveInterval &FMAInt = LIS->getInterval(OldFMAReg); + VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot()); + for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end(); + UI != UE;) { + MachineOperand &UseMO = *UI; + MachineInstr *UseMI = UseMO.getParent(); + ++UI; + + // Don't replace the result register of the copy we're about to erase. + if (UseMI == AddendMI) + continue; + + UseMO.setReg(KilledProdReg); + UseMO.setSubReg(KilledProdSubReg); + } + + // Extend the live intervals of the killed product operand to hold the + // fma result. + + LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg); + for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end(); + AI != AE; ++AI) { + // Don't add the segment that corresponds to the original copy. + if (AI->valno == AddendValNo) + continue; + + VNInfo *NewFMAValNo = + NewFMAInt.getNextValue(AI->start, + LIS->getVNInfoAllocator()); + + NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end, + NewFMAValNo)); + } + DEBUG(dbgs() << " extended: " << NewFMAInt << '\n'); + + FMAInt.removeValNo(FMAValNo); + DEBUG(dbgs() << " trimmed: " << FMAInt << '\n'); + + // Remove the (now unused) copy. + + DEBUG(dbgs() << " removing: " << *AddendMI << '\n'); + LIS->RemoveMachineInstrFromMaps(AddendMI); + AddendMI->eraseFromParent(); + + Changed = true; + } + + return Changed; + } + +public: + bool runOnMachineFunction(MachineFunction &MF) override { + TM = static_cast<const PPCTargetMachine *>(&MF.getTarget()); + // If we don't have VSX then go ahead and return without doing + // anything. + if (!TM->getSubtargetImpl()->hasVSX()) + return false; + + LIS = &getAnalysis<LiveIntervals>(); + + TII = TM->getInstrInfo(); + + bool Changed = false; + + if (DisableVSXFMAMutate) + return Changed; + + for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { + MachineBasicBlock &B = *I++; + if (processBlock(B)) + Changed = true; + } + + return Changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE, + "PowerPC VSX FMA Mutation", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE, + "PowerPC VSX FMA Mutation", false, false) + +char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID; + +char PPCVSXFMAMutate::ID = 0; +FunctionPass* +llvm::createPPCVSXFMAMutatePass() { return new PPCVSXFMAMutate(); } + +#undef DEBUG_TYPE +#define DEBUG_TYPE "ppc-vsx-copy" + +namespace llvm { + void initializePPCVSXCopyPass(PassRegistry&); +} + +namespace { + // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers + // (Altivec and scalar floating-point registers), we need to transform the + // copies into subregister copies with other restrictions. + struct PPCVSXCopy : public MachineFunctionPass { + static char ID; + PPCVSXCopy() : MachineFunctionPass(ID) { + initializePPCVSXCopyPass(*PassRegistry::getPassRegistry()); + } + + const PPCTargetMachine *TM; + const PPCInstrInfo *TII; + + bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC, + MachineRegisterInfo &MRI) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + return RC->hasSubClassEq(MRI.getRegClass(Reg)); + } else if (RC->contains(Reg)) { + return true; + } + + return false; + } + + bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI); + } + + bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI); + } + + bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI); + } + +protected: + bool processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); + I != IE; ++I) { + MachineInstr *MI = I; + if (!MI->isFullCopy()) + continue; + + MachineOperand &DstMO = MI->getOperand(0); + MachineOperand &SrcMO = MI->getOperand(1); + + if ( IsVSReg(DstMO.getReg(), MRI) && + !IsVSReg(SrcMO.getReg(), MRI)) { + // This is a copy *to* a VSX register from a non-VSX register. + Changed = true; + + const TargetRegisterClass *SrcRC = + IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass : + &PPC::VSLRCRegClass; + assert((IsF8Reg(SrcMO.getReg(), MRI) || + IsVRReg(SrcMO.getReg(), MRI)) && + "Unknown source for a VSX copy"); + + unsigned NewVReg = MRI.createVirtualRegister(SrcRC); + BuildMI(MBB, MI, MI->getDebugLoc(), + TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg) + .addImm(1) // add 1, not 0, because there is no implicit clearing + // of the high bits. + .addOperand(SrcMO) + .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 : + PPC::sub_64); + + // The source of the original copy is now the new virtual register. + SrcMO.setReg(NewVReg); + } else if (!IsVSReg(DstMO.getReg(), MRI) && + IsVSReg(SrcMO.getReg(), MRI)) { + // This is a copy *from* a VSX register to a non-VSX register. + Changed = true; + + const TargetRegisterClass *DstRC = + IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass : + &PPC::VSLRCRegClass; + assert((IsF8Reg(DstMO.getReg(), MRI) || + IsVRReg(DstMO.getReg(), MRI)) && + "Unknown destination for a VSX copy"); + + // Copy the VSX value into a new VSX register of the correct subclass. + unsigned NewVReg = MRI.createVirtualRegister(DstRC); + BuildMI(MBB, MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg) + .addOperand(SrcMO); + + // Transform the original copy into a subregister extraction copy. + SrcMO.setReg(NewVReg); + SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 : + PPC::sub_64); + } + } + + return Changed; + } + +public: + bool runOnMachineFunction(MachineFunction &MF) override { + TM = static_cast<const PPCTargetMachine *>(&MF.getTarget()); + // If we don't have VSX on the subtarget, don't do anything. + if (!TM->getSubtargetImpl()->hasVSX()) + return false; + TII = TM->getInstrInfo(); + + bool Changed = false; + + for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { + MachineBasicBlock &B = *I++; + if (processBlock(B)) + Changed = true; + } + + return Changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE, + "PowerPC VSX Copy Legalization", false, false) + +char PPCVSXCopy::ID = 0; +FunctionPass* +llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); } + +#undef DEBUG_TYPE +#define DEBUG_TYPE "ppc-vsx-copy-cleanup" + +namespace llvm { + void initializePPCVSXCopyCleanupPass(PassRegistry&); +} + +namespace { + // PPCVSXCopyCleanup pass - We sometimes end up generating self copies of VSX + // registers (mostly because the ABI code still places all values into the + // "traditional" floating-point and vector registers). Remove them here. + struct PPCVSXCopyCleanup : public MachineFunctionPass { + static char ID; + PPCVSXCopyCleanup() : MachineFunctionPass(ID) { + initializePPCVSXCopyCleanupPass(*PassRegistry::getPassRegistry()); + } + + const PPCTargetMachine *TM; + const PPCInstrInfo *TII; + +protected: + bool processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + + SmallVector<MachineInstr *, 4> ToDelete; + for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); + I != IE; ++I) { + MachineInstr *MI = I; + if (MI->getOpcode() == PPC::XXLOR && + MI->getOperand(0).getReg() == MI->getOperand(1).getReg() && + MI->getOperand(0).getReg() == MI->getOperand(2).getReg()) + ToDelete.push_back(MI); + } + + if (!ToDelete.empty()) + Changed = true; + + for (unsigned i = 0, ie = ToDelete.size(); i != ie; ++i) { + DEBUG(dbgs() << "Removing VSX self-copy: " << *ToDelete[i]); + ToDelete[i]->eraseFromParent(); + } + + return Changed; + } + +public: + bool runOnMachineFunction(MachineFunction &MF) override { + TM = static_cast<const PPCTargetMachine *>(&MF.getTarget()); + // If we don't have VSX don't bother doing anything here. + if (!TM->getSubtargetImpl()->hasVSX()) + return false; + TII = TM->getInstrInfo(); + + bool Changed = false; + + for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { + MachineBasicBlock &B = *I++; + if (processBlock(B)) + Changed = true; + } + + return Changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +INITIALIZE_PASS(PPCVSXCopyCleanup, DEBUG_TYPE, + "PowerPC VSX Copy Cleanup", false, false) + +char PPCVSXCopyCleanup::ID = 0; +FunctionPass* +llvm::createPPCVSXCopyCleanupPass() { return new PPCVSXCopyCleanup(); } + +#undef DEBUG_TYPE #define DEBUG_TYPE "ppc-early-ret" STATISTIC(NumBCLR, "Number of early conditional returns"); STATISTIC(NumBLR, "Number of early returns"); @@ -1428,7 +2125,7 @@ protected: if (J->getOpcode() == PPC::B) { if (J->getOperand(0).getMBB() == &ReturnMBB) { // This is an unconditional branch to the return. Replace the - // branch with a blr. + // branch with a blr. BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR)); MachineBasicBlock::iterator K = J--; K->eraseFromParent(); @@ -1440,7 +2137,7 @@ protected: if (J->getOperand(2).getMBB() == &ReturnMBB) { // This is a conditional branch to the return. Replace the branch // with a bclr. - BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCLR)) + BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR)) .addImm(J->getOperand(0).getImm()) .addReg(J->getOperand(1).getReg()); MachineBasicBlock::iterator K = J--; @@ -1449,6 +2146,20 @@ protected: ++NumBCLR; continue; } + } else if (J->getOpcode() == PPC::BC || J->getOpcode() == PPC::BCn) { + if (J->getOperand(1).getMBB() == &ReturnMBB) { + // This is a conditional branch to the return. Replace the branch + // with a bclr. + BuildMI(**PI, J, J->getDebugLoc(), + TII->get(J->getOpcode() == PPC::BC ? + PPC::BCLR : PPC::BCLRn)) + .addReg(J->getOperand(0).getReg()); + MachineBasicBlock::iterator K = J--; + K->eraseFromParent(); + BlockChanged = true; + ++NumBCLR; + continue; + } } else if (J->isBranch()) { if (J->isIndirectBranch()) { if (ReturnMBB.hasAddressTaken()) @@ -1470,7 +2181,7 @@ protected: if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB)) OtherReference = true; - // Predecessors are stored in a vector and can't be removed here. + // Predecessors are stored in a vector and can't be removed here. if (!OtherReference && BlockChanged) { PredToRemove.push_back(*PI); } @@ -1501,7 +2212,7 @@ protected: } public: - virtual bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { TM = static_cast<const PPCTargetMachine *>(&MF.getTarget()); TII = TM->getInstrInfo(); @@ -1513,7 +2224,7 @@ public: return Changed; for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { - MachineBasicBlock &B = *I++; + MachineBasicBlock &B = *I++; if (processBlock(B)) Changed = true; } @@ -1521,7 +2232,7 @@ public: return Changed; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -1533,4 +2244,3 @@ INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE, char PPCEarlyReturn::ID = 0; FunctionPass* llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); } - diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index f140c41a2a89..83f14c6cf214 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -65,7 +65,7 @@ enum PPC970_Unit { class PPCInstrInfo : public PPCGenInstrInfo { - PPCTargetMachine &TM; + PPCSubtarget &Subtarget; const PPCRegisterInfo RI; bool StoreRegToStackSlot(MachineFunction &MF, @@ -80,142 +80,154 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool &NonRI, bool &SpillsVRS) const; virtual void anchor(); public: - explicit PPCInstrInfo(PPCTargetMachine &TM); + explicit PPCInstrInfo(PPCSubtarget &STI); /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; } + const PPCRegisterInfo &getRegisterInfo() const { return RI; } ScheduleHazardRecognizer * - CreateTargetHazardRecognizer(const TargetMachine *TM, - const ScheduleDAG *DAG) const; + CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, + const ScheduleDAG *DAG) const override; ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, - const ScheduleDAG *DAG) const; + const ScheduleDAG *DAG) const override; + + int getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, + unsigned UseIdx) const override; + int getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const override { + return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx, + UseNode, UseIdx); + } bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, - unsigned &SubIdx) const; + unsigned &SubIdx) const override; unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; + int &FrameIndex) const override; unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; + int &FrameIndex) const override; // commuteInstruction - We can commute rlwimi instructions, but only if the // rotate amt is zero. We also have to munge the immediates a bit. - virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const; + MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const override; + + bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const override; - virtual void insertNoop(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override; // Branch analysis. - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const; - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const; + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const override; + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const override; // Select analysis. - virtual bool canInsertSelect(const MachineBasicBlock&, - const SmallVectorImpl<MachineOperand> &Cond, - unsigned, unsigned, int&, int&, int&) const; - virtual void insertSelect(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DstReg, - const SmallVectorImpl<MachineOperand> &Cond, - unsigned TrueReg, unsigned FalseReg) const; - - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual - bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; - - virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, - unsigned Reg, MachineRegisterInfo *MRI) const; + bool canInsertSelect(const MachineBasicBlock&, + const SmallVectorImpl<MachineOperand> &Cond, + unsigned, unsigned, int&, int&, int&) const override; + void insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DstReg, + const SmallVectorImpl<MachineOperand> &Cond, + unsigned TrueReg, unsigned FalseReg) const override; + + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + bool + ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; + + bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, + unsigned Reg, MachineRegisterInfo *MRI) const override; // If conversion by predication (only supported by some branch instructions). // All of the profitability checks always return true; it is always // profitable to use the predicated branches. - virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumCycles, unsigned ExtraPredCycles, - const BranchProbability &Probability) const { + bool isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, unsigned ExtraPredCycles, + const BranchProbability &Probability) const override { return true; } - virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, - unsigned NumT, unsigned ExtraT, - MachineBasicBlock &FMBB, - unsigned NumF, unsigned ExtraF, - const BranchProbability &Probability) const; + bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumT, unsigned ExtraT, + MachineBasicBlock &FMBB, + unsigned NumF, unsigned ExtraF, + const BranchProbability &Probability) const override; - virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, - unsigned NumCycles, - const BranchProbability - &Probability) const { + bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, + const BranchProbability + &Probability) const override { return true; } - virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, - MachineBasicBlock &FMBB) const { + bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const override { return false; } // Predication support. - bool isPredicated(const MachineInstr *MI) const; + bool isPredicated(const MachineInstr *MI) const override; - virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; + bool isUnpredicatedTerminator(const MachineInstr *MI) const override; - virtual bool PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl<MachineOperand> &Pred) const; + const SmallVectorImpl<MachineOperand> &Pred) const override; - virtual bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, - const SmallVectorImpl<MachineOperand> &Pred2) const; + const SmallVectorImpl<MachineOperand> &Pred2) const override; - virtual bool DefinesPredicate(MachineInstr *MI, - std::vector<MachineOperand> &Pred) const; + bool DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const override; - virtual bool isPredicable(MachineInstr *MI) const; + bool isPredicable(MachineInstr *MI) const override; // Comparison optimization. - virtual bool analyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, unsigned &SrcReg2, - int &Mask, int &Value) const; + bool analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const override; - virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, - unsigned SrcReg, unsigned SrcReg2, - int Mask, int Value, - const MachineRegisterInfo *MRI) const; + bool optimizeCompareInstr(MachineInstr *CmpInstr, + unsigned SrcReg, unsigned SrcReg2, + int Mask, int Value, + const MachineRegisterInfo *MRI) const override; /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// - virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const; + unsigned GetInstSizeInBytes(const MachineInstr *MI) const; }; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index fc29c69642bf..42b740f4fa46 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -57,6 +57,9 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ SDTCisPtrTy<0>, SDTCisVT<1, i32> ]>; +def tocentry32 : Operand<iPTR> { + let MIOperandInfo = (ops i32imm:$imm); +} //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. @@ -99,6 +102,8 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>; def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; +def PPCppc32GOT : SDNode<"PPCISD::PPC32_GOT", SDTIntLeaf, []>; + def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>; def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp, [SDNPMayLoad]>; @@ -139,9 +144,6 @@ def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, [SDNPHasChain, SDNPSideEffect, SDNPInGlue, SDNPOutGlue]>; -def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>, - [SDNPHasChain, SDNPSideEffect, - SDNPInGlue, SDNPOutGlue]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, @@ -288,6 +290,12 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); }], HI16>; +def imm64ZExt32 : Operand<i64>, ImmLeaf<i64, [{ + // imm64ZExt32 predicate - True if the i64 immediate fits in a 32-bit + // zero extended field. + return isUInt<32>(Imm); +}]>; + // Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require // restricted memrix (4-aligned) constants are alignment sensitive. If these // offsets are hidden behind TOC entries than the values of the lower-order @@ -404,6 +412,14 @@ def crrc : RegisterOperand<CRRC> { let ParserMatchClass = PPCRegCRRCAsmOperand; } +def PPCU2ImmAsmOperand : AsmOperandClass { + let Name = "U2Imm"; let PredicateMethod = "isU2Imm"; + let RenderMethod = "addImmOperands"; +} +def u2imm : Operand<i32> { + let PrintMethod = "printU2ImmOperand"; + let ParserMatchClass = PPCU2ImmAsmOperand; +} def PPCS5ImmAsmOperand : AsmOperandClass { let Name = "S5Imm"; let PredicateMethod = "isS5Imm"; let RenderMethod = "addImmOperands"; @@ -411,6 +427,7 @@ def PPCS5ImmAsmOperand : AsmOperandClass { def s5imm : Operand<i32> { let PrintMethod = "printS5ImmOperand"; let ParserMatchClass = PPCS5ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<5>"; } def PPCU5ImmAsmOperand : AsmOperandClass { let Name = "U5Imm"; let PredicateMethod = "isU5Imm"; @@ -419,6 +436,7 @@ def PPCU5ImmAsmOperand : AsmOperandClass { def u5imm : Operand<i32> { let PrintMethod = "printU5ImmOperand"; let ParserMatchClass = PPCU5ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<5>"; } def PPCU6ImmAsmOperand : AsmOperandClass { let Name = "U6Imm"; let PredicateMethod = "isU6Imm"; @@ -427,6 +445,7 @@ def PPCU6ImmAsmOperand : AsmOperandClass { def u6imm : Operand<i32> { let PrintMethod = "printU6ImmOperand"; let ParserMatchClass = PPCU6ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<6>"; } def PPCS16ImmAsmOperand : AsmOperandClass { let Name = "S16Imm"; let PredicateMethod = "isS16Imm"; @@ -436,6 +455,7 @@ def s16imm : Operand<i32> { let PrintMethod = "printS16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS16ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; } def PPCU16ImmAsmOperand : AsmOperandClass { let Name = "U16Imm"; let PredicateMethod = "isU16Imm"; @@ -445,6 +465,7 @@ def u16imm : Operand<i32> { let PrintMethod = "printU16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCU16ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<16>"; } def PPCS17ImmAsmOperand : AsmOperandClass { let Name = "S17Imm"; let PredicateMethod = "isS17Imm"; @@ -457,6 +478,7 @@ def s17imm : Operand<i32> { let PrintMethod = "printS16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS17ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; } def PPCDirectBrAsmOperand : AsmOperandClass { let Name = "DirectBr"; let PredicateMethod = "isDirectBr"; @@ -502,6 +524,7 @@ def PPCCRBitMaskOperand : AsmOperandClass { def crbitm: Operand<i8> { let PrintMethod = "printcrbitm"; let EncoderMethod = "get_crbitm_encoding"; + let DecoderMethod = "decodeCRBitMOperand"; let ParserMatchClass = PPCCRBitMaskOperand; } // Address operands @@ -539,6 +562,7 @@ def memri : Operand<iPTR> { let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIEncoding"; + let DecoderMethod = "decodeMemRIOperands"; } def memrr : Operand<iPTR> { let PrintMethod = "printMemRegReg"; @@ -548,6 +572,7 @@ def memrix : Operand<iPTR> { // memri where the imm is 4-aligned. let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIXEncoding"; + let DecoderMethod = "decodeMemRIXOperands"; } // A single-register address. This is used with the SjLj @@ -555,6 +580,14 @@ def memrix : Operand<iPTR> { // memri where the imm is 4-aligned. def memr : Operand<iPTR> { let MIOperandInfo = (ops ptr_rc:$ptrreg); } +def PPCTLSRegOperand : AsmOperandClass { + let Name = "TLSReg"; let PredicateMethod = "isTLSReg"; + let RenderMethod = "addTLSRegOperands"; +} +def tlsreg32 : Operand<i32> { + let EncoderMethod = "getTLSRegEncoding"; + let ParserMatchClass = PPCTLSRegOperand; +} // PowerPC Predicate operand. def pred : Operand<OtherVT> { @@ -577,10 +610,10 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. -def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; -def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; -def IsBookE : Predicate<"PPCSubTarget.isBookE()">; -def IsNotBookE : Predicate<"!PPCSubTarget.isBookE()">; +def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">; +def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">; +def IsBookE : Predicate<"PPCSubTarget->isBookE()">; +def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. @@ -614,20 +647,6 @@ multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, } } -multiclass XForm_10r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, - string asmbase, string asmstr, InstrItinClass itin, - list<dag> pattern> { - let BaseName = asmbase in { - def NAME : XForm_10<opcode, xo, OOL, IOL, - !strconcat(asmbase, !strconcat(" ", asmstr)), itin, - pattern>, RecFormRel; - let Defs = [CR0] in - def o : XForm_10<opcode, xo, OOL, IOL, - !strconcat(asmbase, !strconcat(". ", asmstr)), itin, - []>, isDOT, RecFormRel; - } -} - multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list<dag> pattern> { @@ -888,30 +907,63 @@ let usesCustomInserter = 1, // Expanded after instruction selection. def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F, i32imm:$BROPC), "#SELECT_CC_VRRC", []>; + + // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition + // register bit directly. + def SELECT_I4 : Pseudo<(outs gprc:$dst), (ins crbitrc:$cond, + gprc_nor0:$T, gprc_nor0:$F), "#SELECT_I4", + [(set i32:$dst, (select i1:$cond, i32:$T, i32:$F))]>; + def SELECT_I8 : Pseudo<(outs g8rc:$dst), (ins crbitrc:$cond, + g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8", + [(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>; + def SELECT_F4 : Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond, + f4rc:$T, f4rc:$F), "#SELECT_F4", + [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>; + def SELECT_F8 : Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond, + f8rc:$T, f8rc:$F), "#SELECT_F8", + [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>; + def SELECT_VRRC: Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond, + vrrc:$T, vrrc:$F), "#SELECT_VRRC", + [(set v4i32:$dst, + (select i1:$cond, v4i32:$T, v4i32:$F))]>; } // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to // scavenge a register for it. -let mayStore = 1 in +let mayStore = 1 in { def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F), "#SPILL_CR", []>; +def SPILL_CRBIT : Pseudo<(outs), (ins crbitrc:$cond, memri:$F), + "#SPILL_CRBIT", []>; +} // RESTORE_CR - Indicate that we're restoring the CR register (previously // spilled), so we'll need to scavenge a register for it. -let mayLoad = 1 in +let mayLoad = 1 in { def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F), "#RESTORE_CR", []>; +def RESTORE_CRBIT : Pseudo<(outs crbitrc:$cond), (ins memri:$F), + "#RESTORE_CRBIT", []>; +} let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { let isReturn = 1, Uses = [LR, RM] in - def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB, + def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB, [(retflag)]>; let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in { - def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>; + def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>; - let isCodeGenOnly = 1 in - def BCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), - "b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>; + let isCodeGenOnly = 1 in { + def BCCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), + "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB, + []>; + + def BCCTR : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi), + "bcctr 12, $bi, 0", IIC_BrB, []>; + def BCCTRn : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi), + "bcctr 4, $bi, 0", IIC_BrB, []>; + } } } @@ -922,10 +974,10 @@ let Defs = [LR] in let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let isBarrier = 1 in { def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst), - "b $dst", BrB, + "b $dst", IIC_BrB, [(br bb:$dst)]>; def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst), - "ba $dst", BrB, []>; + "ba $dst", IIC_BrB, []>; } // BCC represents an arbitrary conditional branch on a predicate. @@ -939,23 +991,39 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { "b${cond:cc}a${cond:pm} ${cond:reg}, $dst">; let isReturn = 1, Uses = [LR, RM] in - def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond), - "b${cond:cc}lr${cond:pm} ${cond:reg}", BrB, []>; + def BCCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond), + "b${cond:cc}lr${cond:pm} ${cond:reg}", IIC_BrB, []>; + } + + let isCodeGenOnly = 1 in { + let Pattern = [(brcond i1:$bi, bb:$dst)] in + def BC : BForm_4<16, 12, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst), + "bc 12, $bi, $dst">; + + let Pattern = [(brcond (not i1:$bi), bb:$dst)] in + def BCn : BForm_4<16, 4, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst), + "bc 4, $bi, $dst">; + + let isReturn = 1, Uses = [LR, RM] in + def BCLR : XLForm_2_br2<19, 16, 12, 0, (outs), (ins crbitrc:$bi), + "bclr 12, $bi, 0", IIC_BrB, []>; + def BCLRn : XLForm_2_br2<19, 16, 4, 0, (outs), (ins crbitrc:$bi), + "bclr 4, $bi, 0", IIC_BrB, []>; } let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in { def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), - "bdzlr", BrB, []>; + "bdzlr", IIC_BrB, []>; def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), - "bdnzlr", BrB, []>; + "bdnzlr", IIC_BrB, []>; def BDZLRp : XLForm_2_ext<19, 16, 27, 0, 0, (outs), (ins), - "bdzlr+", BrB, []>; + "bdzlr+", IIC_BrB, []>; def BDNZLRp: XLForm_2_ext<19, 16, 25, 0, 0, (outs), (ins), - "bdnzlr+", BrB, []>; + "bdnzlr+", IIC_BrB, []>; def BDZLRm : XLForm_2_ext<19, 16, 26, 0, 0, (outs), (ins), - "bdzlr-", BrB, []>; + "bdzlr-", IIC_BrB, []>; def BDNZLRm: XLForm_2_ext<19, 16, 24, 0, 0, (outs), (ins), - "bdnzlr-", BrB, []>; + "bdnzlr-", IIC_BrB, []>; } let Defs = [CTR], Uses = [CTR] in { @@ -998,33 +1066,54 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. + "bl $func", IIC_BrB, []>; // See Pat patterns below. def BLA : IForm<18, 1, 1, (outs), (ins abscalltarget:$func), - "bla $func", BrB, [(PPCcall (i32 imm:$func))]>; + "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>; let isCodeGenOnly = 1 in { def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst), "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">; def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst), "b${cond:cc}la${cond:pm} ${cond:reg}, $dst">; + + def BCL : BForm_4<16, 12, 0, 1, (outs), + (ins crbitrc:$bi, condbrtarget:$dst), + "bcl 12, $bi, $dst">; + def BCLn : BForm_4<16, 4, 0, 1, (outs), + (ins crbitrc:$bi, condbrtarget:$dst), + "bcl 4, $bi, $dst">; } } let Uses = [CTR, RM] in { def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), - "bctrl", BrB, [(PPCbctrl)]>, + "bctrl", IIC_BrB, [(PPCbctrl)]>, Requires<[In32BitMode]>; - let isCodeGenOnly = 1 in - def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), - "b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>; + let isCodeGenOnly = 1 in { + def BCCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), + "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB, + []>; + + def BCCTRL : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi), + "bcctrl 12, $bi, 0", IIC_BrB, []>; + def BCCTRLn : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi), + "bcctrl 4, $bi, 0", IIC_BrB, []>; + } } let Uses = [LR, RM] in { def BLRL : XLForm_2_ext<19, 16, 20, 0, 1, (outs), (ins), - "blrl", BrB, []>; + "blrl", IIC_BrB, []>; - let isCodeGenOnly = 1 in - def BCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond), - "b${cond:cc}lrl${cond:pm} ${cond:reg}", BrB, []>; + let isCodeGenOnly = 1 in { + def BCCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond), + "b${cond:cc}lrl${cond:pm} ${cond:reg}", IIC_BrB, + []>; + + def BCLRL : XLForm_2_br2<19, 16, 12, 1, (outs), (ins crbitrc:$bi), + "bclrl 12, $bi, 0", IIC_BrB, []>; + def BCLRLn : XLForm_2_br2<19, 16, 4, 1, (outs), (ins crbitrc:$bi), + "bclrl 4, $bi, 0", IIC_BrB, []>; + } } let Defs = [CTR], Uses = [CTR, RM] in { def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst), @@ -1054,17 +1143,17 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { } let Defs = [CTR], Uses = [CTR, LR, RM] in { def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins), - "bdzlrl", BrB, []>; + "bdzlrl", IIC_BrB, []>; def BDNZLRL : XLForm_2_ext<19, 16, 16, 0, 1, (outs), (ins), - "bdnzlrl", BrB, []>; + "bdnzlrl", IIC_BrB, []>; def BDZLRLp : XLForm_2_ext<19, 16, 27, 0, 1, (outs), (ins), - "bdzlrl+", BrB, []>; + "bdzlrl+", IIC_BrB, []>; def BDNZLRLp: XLForm_2_ext<19, 16, 25, 0, 1, (outs), (ins), - "bdnzlrl+", BrB, []>; + "bdnzlrl+", IIC_BrB, []>; def BDZLRLm : XLForm_2_ext<19, 16, 26, 0, 1, (outs), (ins), - "bdzlrl-", BrB, []>; + "bdzlrl-", IIC_BrB, []>; def BDNZLRLm: XLForm_2_ext<19, 16, 24, 0, 1, (outs), (ins), - "bdnzlrl-", BrB, []>; + "bdnzlrl-", IIC_BrB, []>; } } @@ -1090,19 +1179,19 @@ let isCodeGenOnly = 1 in { let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in -def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, - Requires<[In32BitMode]>; +def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>, Requires<[In32BitMode]>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst), - "b $dst", BrB, + "b $dst", IIC_BrB, []>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst), - "ba $dst", BrB, + "ba $dst", IIC_BrB, []>; } @@ -1128,33 +1217,33 @@ let isBranch = 1, isTerminator = 1 in { // System call. let PPC970_Unit = 7 in { def SC : SCForm<17, 1, (outs), (ins i32imm:$lev), - "sc $lev", BrB, [(PPCsc (i32 imm:$lev))]>; + "sc $lev", IIC_BrB, [(PPCsc (i32 imm:$lev))]>; } // DCB* instructions. -def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), - "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>, +def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst", + IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst), - "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>, +def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst), "dcbf $dst", + IIC_LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), - "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>, +def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), "dcbi $dst", + IIC_LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), - "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>, +def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), "dcbst $dst", + IIC_LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst), - "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>, +def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst), "dcbt $dst", + IIC_LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst), - "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>, +def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst), "dcbtst $dst", + IIC_LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), - "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>, +def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), "dcbz $dst", + IIC_LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), - "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, +def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst", + IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, PPC970_DGroup_Single; def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)), @@ -1242,26 +1331,26 @@ let usesCustomInserter = 1 in { // Instructions to support atomic operations def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src), - "lwarx $rD, $src", LdStLWARX, + "lwarx $rD, $src", IIC_LdStLWARX, [(set i32:$rD, (PPClarx xoaddr:$src))]>; let Defs = [CR0] in def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst), - "stwcx. $rS, $dst", LdStSTWCX, + "stwcx. $rS, $dst", IIC_LdStSTWCX, [(PPCstcx i32:$rS, xoaddr:$dst)]>, isDOT; let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in -def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>; +def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>; def TWI : DForm_base<3, (outs), (ins u5imm:$to, gprc:$rA, s16imm:$imm), - "twi $to, $rA, $imm", IntTrapW, []>; + "twi $to, $rA, $imm", IIC_IntTrapW, []>; def TW : XForm_1<31, 4, (outs), (ins u5imm:$to, gprc:$rA, gprc:$rB), - "tw $to, $rA, $rB", IntTrapW, []>; + "tw $to, $rA, $rB", IIC_IntTrapW, []>; def TDI : DForm_base<2, (outs), (ins u5imm:$to, g8rc:$rA, s16imm:$imm), - "tdi $to, $rA, $imm", IntTrapD, []>; + "tdi $to, $rA, $imm", IIC_IntTrapD, []>; def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB), - "td $to, $rA, $rB", IntTrapD, []>; + "td $to, $rA, $rB", IIC_IntTrapD, []>; //===----------------------------------------------------------------------===// // PPC32 Load Instructions. @@ -1270,56 +1359,56 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB), // Unindexed (r+i) Loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src), - "lbz $rD, $src", LdStLoad, + "lbz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi8 iaddr:$src))]>; def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src), - "lha $rD, $src", LdStLHA, + "lha $rD, $src", IIC_LdStLHA, [(set i32:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src), - "lhz $rD, $src", LdStLoad, + "lhz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi16 iaddr:$src))]>; def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src), - "lwz $rD, $src", LdStLoad, + "lwz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src), - "lfs $rD, $src", LdStLFD, + "lfs $rD, $src", IIC_LdStLFD, [(set f32:$rD, (load iaddr:$src))]>; def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src), - "lfd $rD, $src", LdStLFD, + "lfd $rD, $src", IIC_LdStLFD, [(set f64:$rD, (load iaddr:$src))]>; // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1, neverHasSideEffects = 1 in { def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoadUpd, + "lbzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStLHAU, + "lhau $rD, $addr", IIC_LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoadUpd, + "lhzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoadUpd, + "lwzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lfsu $rD, $addr", LdStLFDU, + "lfsu $rD, $addr", IIC_LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lfdu $rD, $addr", LdStLFDU, + "lfdu $rD, $addr", IIC_LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; @@ -1327,37 +1416,37 @@ def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr // Indexed (r+r) Loads with Update (preinc). def LBZUX : XForm_1<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoadUpd, + "lbzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LHAUX : XForm_1<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLHAU, + "lhaux $rD, $addr", IIC_LdStLHAUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX : XForm_1<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoadUpd, + "lhzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX : XForm_1<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoadUpd, + "lwzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LFSUX : XForm_1<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lfsux $rD, $addr", LdStLFDU, + "lfsux $rD, $addr", IIC_LdStLFDUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lfdux $rD, $addr", LdStLFDU, + "lfdux $rD, $addr", IIC_LdStLFDUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; } @@ -1367,45 +1456,45 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), // let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src), - "lbzx $rD, $src", LdStLoad, + "lbzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi8 xaddr:$src))]>; def LHAX : XForm_1<31, 343, (outs gprc:$rD), (ins memrr:$src), - "lhax $rD, $src", LdStLHA, + "lhax $rD, $src", IIC_LdStLHA, [(set i32:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src), - "lhzx $rD, $src", LdStLoad, + "lhzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi16 xaddr:$src))]>; def LWZX : XForm_1<31, 23, (outs gprc:$rD), (ins memrr:$src), - "lwzx $rD, $src", LdStLoad, + "lwzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (load xaddr:$src))]>; def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src), - "lhbrx $rD, $src", LdStLoad, + "lhbrx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>; def LWBRX : XForm_1<31, 534, (outs gprc:$rD), (ins memrr:$src), - "lwbrx $rD, $src", LdStLoad, + "lwbrx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs f4rc:$frD), (ins memrr:$src), - "lfsx $frD, $src", LdStLFD, + "lfsx $frD, $src", IIC_LdStLFD, [(set f32:$frD, (load xaddr:$src))]>; def LFDX : XForm_25<31, 599, (outs f8rc:$frD), (ins memrr:$src), - "lfdx $frD, $src", LdStLFD, + "lfdx $frD, $src", IIC_LdStLFD, [(set f64:$frD, (load xaddr:$src))]>; def LFIWAX : XForm_25<31, 855, (outs f8rc:$frD), (ins memrr:$src), - "lfiwax $frD, $src", LdStLFD, + "lfiwax $frD, $src", IIC_LdStLFD, [(set f64:$frD, (PPClfiwax xoaddr:$src))]>; def LFIWZX : XForm_25<31, 887, (outs f8rc:$frD), (ins memrr:$src), - "lfiwzx $frD, $src", LdStLFD, + "lfiwzx $frD, $src", IIC_LdStLFD, [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>; } // Load Multiple def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src), - "lmw $rD, $src", LdStLMW, []>; + "lmw $rD, $src", IIC_LdStLMW, []>; //===----------------------------------------------------------------------===// // PPC32 Store Instructions. @@ -1414,38 +1503,38 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src), // Unindexed (r+i) Stores. let PPC970_Unit = 2 in { def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src), - "stb $rS, $src", LdStStore, + "stb $rS, $src", IIC_LdStStore, [(truncstorei8 i32:$rS, iaddr:$src)]>; def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src), - "sth $rS, $src", LdStStore, + "sth $rS, $src", IIC_LdStStore, [(truncstorei16 i32:$rS, iaddr:$src)]>; def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src), - "stw $rS, $src", LdStStore, + "stw $rS, $src", IIC_LdStStore, [(store i32:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst), - "stfs $rS, $dst", LdStSTFD, + "stfs $rS, $dst", IIC_LdStSTFD, [(store f32:$rS, iaddr:$dst)]>; def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst), - "stfd $rS, $dst", LdStSTFD, + "stfd $rS, $dst", IIC_LdStSTFD, [(store f64:$rS, iaddr:$dst)]>; } // Unindexed (r+i) Stores with Update (preinc). let PPC970_Unit = 2, mayStore = 1 in { def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), - "stbu $rS, $dst", LdStStoreUpd, []>, + "stbu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), - "sthu $rS, $dst", LdStStoreUpd, []>, + "sthu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), - "stwu $rS, $dst", LdStStoreUpd, []>, + "stwu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst), - "stfsu $rS, $dst", LdStSTFDU, []>, + "stfsu $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst), - "stfdu $rS, $dst", LdStSTFDU, []>, + "stfdu $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; } @@ -1466,59 +1555,59 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), // Indexed (r+r) Stores. let PPC970_Unit = 2 in { def STBX : XForm_8<31, 215, (outs), (ins gprc:$rS, memrr:$dst), - "stbx $rS, $dst", LdStStore, + "stbx $rS, $dst", IIC_LdStStore, [(truncstorei8 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX : XForm_8<31, 407, (outs), (ins gprc:$rS, memrr:$dst), - "sthx $rS, $dst", LdStStore, + "sthx $rS, $dst", IIC_LdStStore, [(truncstorei16 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX : XForm_8<31, 151, (outs), (ins gprc:$rS, memrr:$dst), - "stwx $rS, $dst", LdStStore, + "stwx $rS, $dst", IIC_LdStStore, [(store i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHBRX: XForm_8<31, 918, (outs), (ins gprc:$rS, memrr:$dst), - "sthbrx $rS, $dst", LdStStore, + "sthbrx $rS, $dst", IIC_LdStStore, [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>, PPC970_DGroup_Cracked; def STWBRX: XForm_8<31, 662, (outs), (ins gprc:$rS, memrr:$dst), - "stwbrx $rS, $dst", LdStStore, + "stwbrx $rS, $dst", IIC_LdStStore, [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>, PPC970_DGroup_Cracked; def STFIWX: XForm_28<31, 983, (outs), (ins f8rc:$frS, memrr:$dst), - "stfiwx $frS, $dst", LdStSTFD, + "stfiwx $frS, $dst", IIC_LdStSTFD, [(PPCstfiwx f64:$frS, xoaddr:$dst)]>; def STFSX : XForm_28<31, 663, (outs), (ins f4rc:$frS, memrr:$dst), - "stfsx $frS, $dst", LdStSTFD, + "stfsx $frS, $dst", IIC_LdStSTFD, [(store f32:$frS, xaddr:$dst)]>; def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst), - "stfdx $frS, $dst", LdStSTFD, + "stfdx $frS, $dst", IIC_LdStSTFD, [(store f64:$frS, xaddr:$dst)]>; } // Indexed (r+r) Stores with Update (preinc). let PPC970_Unit = 2, mayStore = 1 in { def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), - "stbux $rS, $dst", LdStStoreUpd, []>, + "stbux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), - "sthux $rS, $dst", LdStStoreUpd, []>, + "sthux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), - "stwux $rS, $dst", LdStStoreUpd, []>, + "stwux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memrr:$dst), - "stfsux $rS, $dst", LdStSTFDU, []>, + "stfsux $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memrr:$dst), - "stfdux $rS, $dst", LdStSTFDU, []>, + "stfdux $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; } @@ -1539,14 +1628,14 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), // Store Multiple def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst), - "stmw $rS, $dst", LdStLMW, []>; + "stmw $rS, $dst", IIC_LdStLMW, []>; def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L), - "sync $L", LdStSync, []>, Requires<[IsNotBookE]>; + "sync $L", IIC_LdStSync, []>, Requires<[IsNotBookE]>; let isCodeGenOnly = 1 in { def MSYNC : XForm_24_sync<31, 598, (outs), (ins), - "msync", LdStSync, []>, Requires<[IsBookE]> { + "msync", IIC_LdStSync, []>, Requires<[IsBookE]> { let L = 0; } } @@ -1560,41 +1649,41 @@ def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>; let PPC970_Unit = 1 in { // FXU Operations. def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm), - "addi $rD, $rA, $imm", IntSimple, + "addi $rD, $rA, $imm", IIC_IntSimple, [(set i32:$rD, (add i32:$rA, imm32SExt16:$imm))]>; let BaseName = "addic" in { let Defs = [CARRY] in def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), - "addic $rD, $rA, $imm", IntGeneral, + "addic $rD, $rA, $imm", IIC_IntGeneral, [(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>, RecFormRel, PPC970_DGroup_Cracked; let Defs = [CARRY, CR0] in def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), - "addic. $rD, $rA, $imm", IntGeneral, + "addic. $rD, $rA, $imm", IIC_IntGeneral, []>, isDOT, RecFormRel; } def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm), - "addis $rD, $rA, $imm", IntSimple, + "addis $rD, $rA, $imm", IIC_IntSimple, [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>; let isCodeGenOnly = 1 in def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$sym), - "la $rD, $sym($rA)", IntGeneral, + "la $rD, $sym($rA)", IIC_IntGeneral, [(set i32:$rD, (add i32:$rA, (PPClo tglobaladdr:$sym, 0)))]>; def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), - "mulli $rD, $rA, $imm", IntMulLI, + "mulli $rD, $rA, $imm", IIC_IntMulLI, [(set i32:$rD, (mul i32:$rA, imm32SExt16:$imm))]>; let Defs = [CARRY] in def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), - "subfic $rD, $rA, $imm", IntGeneral, + "subfic $rD, $rA, $imm", IIC_IntGeneral, [(set i32:$rD, (subc imm32SExt16:$imm, i32:$rA))]>; let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm), - "li $rD, $imm", IntSimple, + "li $rD, $imm", IIC_IntSimple, [(set i32:$rD, imm32SExt16:$imm)]>; def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s17imm:$imm), - "lis $rD, $imm", IntSimple, + "lis $rD, $imm", IIC_IntSimple, [(set i32:$rD, imm16ShiftedSExt:$imm)]>; } } @@ -1602,154 +1691,170 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { let PPC970_Unit = 1 in { // FXU Operations. let Defs = [CR0] in { def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "andi. $dst, $src1, $src2", IntGeneral, + "andi. $dst, $src1, $src2", IIC_IntGeneral, [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>, isDOT; def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "andis. $dst, $src1, $src2", IntGeneral, + "andis. $dst, $src1, $src2", IIC_IntGeneral, [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>, isDOT; } def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "ori $dst, $src1, $src2", IntSimple, + "ori $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>; def ORIS : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "oris $dst, $src1, $src2", IntSimple, + "oris $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>; def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "xori $dst, $src1, $src2", IntSimple, + "xori $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>; def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "xoris $dst, $src1, $src2", IntSimple, + "xoris $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>; -def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple, + +def NOP : DForm_4_zero<24, (outs), (ins), "nop", IIC_IntSimple, []>; +let isCodeGenOnly = 1 in { +// The POWER6 and POWER7 have special group-terminating nops. +def NOP_GT_PWR6 : DForm_4_fixedreg_zero<24, 1, (outs), (ins), + "ori 1, 1, 0", IIC_IntSimple, []>; +def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins), + "ori 2, 2, 0", IIC_IntSimple, []>; +} + let isCompare = 1, neverHasSideEffects = 1 in { def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm), - "cmpwi $crD, $rA, $imm", IntCompare>; + "cmpwi $crD, $rA, $imm", IIC_IntCompare>; def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2), - "cmplwi $dst, $src1, $src2", IntCompare>; + "cmplwi $dst, $src1, $src2", IIC_IntCompare>; } } let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations. +let isCommutable = 1 in { defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "nand", "$rA, $rS, $rB", IntSimple, + "nand", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>; defm AND : XForm_6r<31, 28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "and", "$rA, $rS, $rB", IntSimple, + "and", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (and i32:$rS, i32:$rB))]>; +} // isCommutable defm ANDC : XForm_6r<31, 60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "andc", "$rA, $rS, $rB", IntSimple, + "andc", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>; +let isCommutable = 1 in { defm OR : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "or", "$rA, $rS, $rB", IntSimple, + "or", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (or i32:$rS, i32:$rB))]>; defm NOR : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "nor", "$rA, $rS, $rB", IntSimple, + "nor", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>; +} // isCommutable defm ORC : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "orc", "$rA, $rS, $rB", IntSimple, + "orc", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>; +let isCommutable = 1 in { defm EQV : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "eqv", "$rA, $rS, $rB", IntSimple, + "eqv", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>; defm XOR : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "xor", "$rA, $rS, $rB", IntSimple, + "xor", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (xor i32:$rS, i32:$rB))]>; +} // isCommutable defm SLW : XForm_6r<31, 24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "slw", "$rA, $rS, $rB", IntGeneral, + "slw", "$rA, $rS, $rB", IIC_IntGeneral, [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>; defm SRW : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "srw", "$rA, $rS, $rB", IntGeneral, + "srw", "$rA, $rS, $rB", IIC_IntGeneral, [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>; defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "sraw", "$rA, $rS, $rB", IntShift, + "sraw", "$rA, $rS, $rB", IIC_IntShift, [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>; } let PPC970_Unit = 1 in { // FXU Operations. let neverHasSideEffects = 1 in { defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH), - "srawi", "$rA, $rS, $SH", IntShift, + "srawi", "$rA, $rS, $SH", IIC_IntShift, [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>; defm CNTLZW : XForm_11r<31, 26, (outs gprc:$rA), (ins gprc:$rS), - "cntlzw", "$rA, $rS", IntGeneral, + "cntlzw", "$rA, $rS", IIC_IntGeneral, [(set i32:$rA, (ctlz i32:$rS))]>; defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS), - "extsb", "$rA, $rS", IntSimple, + "extsb", "$rA, $rS", IIC_IntSimple, [(set i32:$rA, (sext_inreg i32:$rS, i8))]>; defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS), - "extsh", "$rA, $rS", IntSimple, + "extsh", "$rA, $rS", IIC_IntSimple, [(set i32:$rA, (sext_inreg i32:$rS, i16))]>; } let isCompare = 1, neverHasSideEffects = 1 in { def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB), - "cmpw $crD, $rA, $rB", IntCompare>; + "cmpw $crD, $rA, $rB", IIC_IntCompare>; def CMPLW : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB), - "cmplw $crD, $rA, $rB", IntCompare>; + "cmplw $crD, $rA, $rB", IIC_IntCompare>; } } let PPC970_Unit = 3 in { // FPU Operations. //def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB), -// "fcmpo $crD, $fA, $fB", FPCompare>; +// "fcmpo $crD, $fA, $fB", IIC_FPCompare>; let isCompare = 1, neverHasSideEffects = 1 in { def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB), - "fcmpu $crD, $fA, $fB", FPCompare>; + "fcmpu $crD, $fA, $fB", IIC_FPCompare>; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), - "fcmpu $crD, $fA, $fB", FPCompare>; + "fcmpu $crD, $fA, $fB", IIC_FPCompare>; } let Uses = [RM] in { let neverHasSideEffects = 1 in { defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiw", "$frD, $frB", FPGeneral, + "fctiw", "$frD, $frB", IIC_FPGeneral, []>; defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiwz", "$frD, $frB", FPGeneral, + "fctiwz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctiwz f64:$frB))]>; defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB), - "frsp", "$frD, $frB", FPGeneral, + "frsp", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fround f64:$frB))]>; - let Interpretation64Bit = 1 in + let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB), - "frin", "$frD, $frB", FPGeneral, + "frin", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (frnd f64:$frB))]>; defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB), - "frin", "$frD, $frB", FPGeneral, + "frin", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (frnd f32:$frB))]>; } let neverHasSideEffects = 1 in { - let Interpretation64Bit = 1 in + let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB), - "frip", "$frD, $frB", FPGeneral, + "frip", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fceil f64:$frB))]>; defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB), - "frip", "$frD, $frB", FPGeneral, + "frip", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fceil f32:$frB))]>; - let Interpretation64Bit = 1 in + let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB), - "friz", "$frD, $frB", FPGeneral, + "friz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (ftrunc f64:$frB))]>; defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB), - "friz", "$frD, $frB", FPGeneral, + "friz", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (ftrunc f32:$frB))]>; - let Interpretation64Bit = 1 in + let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB), - "frim", "$frD, $frB", FPGeneral, + "frim", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (ffloor f64:$frB))]>; defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB), - "frim", "$frD, $frB", FPGeneral, + "frim", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (ffloor f32:$frB))]>; defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB), - "fsqrt", "$frD, $frB", FPSqrt, + "fsqrt", "$frD, $frB", IIC_FPSqrtD, [(set f64:$frD, (fsqrt f64:$frB))]>; defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB), - "fsqrts", "$frD, $frB", FPSqrt, + "fsqrts", "$frD, $frB", IIC_FPSqrtS, [(set f32:$frD, (fsqrt f32:$frB))]>; } } @@ -1761,54 +1866,54 @@ let Uses = [RM] in { /// sneak into a d-group with a store). let neverHasSideEffects = 1 in defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB), - "fmr", "$frD, $frB", FPGeneral, + "fmr", "$frD, $frB", IIC_FPGeneral, []>, // (set f32:$frD, f32:$frB) PPC970_Unit_Pseudo; let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations. // These are artificially split into two different forms, for 4/8 byte FP. defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB), - "fabs", "$frD, $frB", FPGeneral, + "fabs", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fabs f32:$frB))]>; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FABSD : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB), - "fabs", "$frD, $frB", FPGeneral, + "fabs", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fabs f64:$frB))]>; defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB), - "fnabs", "$frD, $frB", FPGeneral, + "fnabs", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fneg (fabs f32:$frB)))]>; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB), - "fnabs", "$frD, $frB", FPGeneral, + "fnabs", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fneg (fabs f64:$frB)))]>; defm FNEGS : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB), - "fneg", "$frD, $frB", FPGeneral, + "fneg", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fneg f32:$frB))]>; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB), - "fneg", "$frD, $frB", FPGeneral, + "fneg", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fneg f64:$frB))]>; defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB), - "fcpsgn", "$frD, $frA, $frB", FPGeneral, + "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral, [(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB), - "fcpsgn", "$frD, $frA, $frB", FPGeneral, + "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral, [(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>; // Reciprocal estimates. defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB), - "fre", "$frD, $frB", FPGeneral, + "fre", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfre f64:$frB))]>; defm FRES : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB), - "fres", "$frD, $frB", FPGeneral, + "fres", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfre f32:$frB))]>; defm FRSQRTE : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB), - "frsqrte", "$frD, $frB", FPGeneral, + "frsqrte", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfrsqrte f64:$frB))]>; defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB), - "frsqrtes", "$frD, $frB", FPGeneral, + "frsqrtes", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfrsqrte f32:$frB))]>; } @@ -1816,57 +1921,67 @@ defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB), // let neverHasSideEffects = 1 in def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA), - "mcrf $BF, $BFA", BrMCR>, + "mcrf $BF, $BFA", IIC_BrMCR>, PPC970_DGroup_First, PPC970_Unit_CRU; +let isCommutable = 1 in { def CRAND : XLForm_1<19, 257, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crand $CRD, $CRA, $CRB", BrCR, []>; + "crand $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (and i1:$CRA, i1:$CRB))]>; def CRNAND : XLForm_1<19, 225, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crnand $CRD, $CRA, $CRB", BrCR, []>; + "crnand $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (not (and i1:$CRA, i1:$CRB)))]>; def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "cror $CRD, $CRA, $CRB", BrCR, []>; + "cror $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (or i1:$CRA, i1:$CRB))]>; def CRXOR : XLForm_1<19, 193, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crxor $CRD, $CRA, $CRB", BrCR, []>; + "crxor $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (xor i1:$CRA, i1:$CRB))]>; def CRNOR : XLForm_1<19, 33, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crnor $CRD, $CRA, $CRB", BrCR, []>; + "crnor $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (not (or i1:$CRA, i1:$CRB)))]>; def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "creqv $CRD, $CRA, $CRB", BrCR, []>; + "creqv $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (not (xor i1:$CRA, i1:$CRB)))]>; +} // isCommutable def CRANDC : XLForm_1<19, 129, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crandc $CRD, $CRA, $CRB", BrCR, []>; + "crandc $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (and i1:$CRA, (not i1:$CRB)))]>; def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crorc $CRD, $CRA, $CRB", BrCR, []>; + "crorc $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>; let isCodeGenOnly = 1 in { def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins), - "creqv $dst, $dst, $dst", BrCR, - []>; + "creqv $dst, $dst, $dst", IIC_BrCR, + [(set i1:$dst, 1)]>; def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins), - "crxor $dst, $dst, $dst", BrCR, - []>; + "crxor $dst, $dst, $dst", IIC_BrCR, + [(set i1:$dst, 0)]>; let Defs = [CR1EQ], CRD = 6 in { def CR6SET : XLForm_1_ext<19, 289, (outs), (ins), - "creqv 6, 6, 6", BrCR, + "creqv 6, 6, 6", IIC_BrCR, [(PPCcr6set)]>; def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), - "crxor 6, 6, 6", BrCR, + "crxor 6, 6, 6", IIC_BrCR, [(PPCcr6unset)]>; } } @@ -1875,38 +1990,38 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), // def MFSPR : XFXForm_1<31, 339, (outs gprc:$RT), (ins i32imm:$SPR), - "mfspr $RT, $SPR", SprMFSPR>; + "mfspr $RT, $SPR", IIC_SprMFSPR>; def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT), - "mtspr $SPR, $RT", SprMTSPR>; + "mtspr $SPR, $RT", IIC_SprMTSPR>; def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR), - "mftb $RT, $SPR", SprMFTB>, Deprecated<DeprecatedMFTB>; + "mftb $RT, $SPR", IIC_SprMFTB>, Deprecated<DeprecatedMFTB>; let Uses = [CTR] in { def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins), - "mfctr $rT", SprMFSPR>, + "mfctr $rT", IIC_SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in { def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), - "mtctr $rS", SprMTSPR>, + "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in { let Pattern = [(int_ppc_mtctr i32:$rS)] in def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), - "mtctr $rS", SprMTSPR>, + "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Defs = [LR] in { def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS), - "mtlr $rS", SprMTSPR>, + "mtlr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Uses = [LR] in { def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins), - "mflr $rT", SprMFSPR>, + "mflr $rT", IIC_SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } @@ -1915,19 +2030,19 @@ let isCodeGenOnly = 1 in { // like a GPR on the PPC970. As such, copies in and out have the same // performance characteristics as an OR instruction. def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS), - "mtspr 256, $rS", IntGeneral>, + "mtspr 256, $rS", IIC_IntGeneral>, PPC970_DGroup_Single, PPC970_Unit_FXU; def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins), - "mfspr $rT, 256", IntGeneral>, + "mfspr $rT, 256", IIC_IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; def MTVRSAVEv : XFXForm_7_ext<31, 467, 256, (outs VRSAVERC:$reg), (ins gprc:$rS), - "mtspr 256, $rS", IntGeneral>, + "mtspr 256, $rS", IIC_IntGeneral>, PPC970_DGroup_Single, PPC970_Unit_FXU; def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins VRSAVERC:$reg), - "mfspr $rT, 256", IntGeneral>, + "mfspr $rT, 256", IIC_IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; } @@ -1945,20 +2060,20 @@ def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), let neverHasSideEffects = 1 in { def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST), - "mtocrf $FXM, $ST", BrMCRX>, + "mtocrf $FXM, $ST", IIC_BrMCRX>, PPC970_DGroup_First, PPC970_Unit_CRU; def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS), - "mtcrf $FXM, $rS", BrMCRX>, + "mtcrf $FXM, $rS", IIC_BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking. def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM), - "mfocrf $rT, $FXM", SprMFCR>, + "mfocrf $rT, $FXM", IIC_SprMFCRF>, PPC970_DGroup_First, PPC970_Unit_CRU; def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins), - "mfcr $rT", SprMFCR>, + "mfcr $rT", IIC_SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; } // neverHasSideEffects = 1 @@ -1972,18 +2087,18 @@ let usesCustomInserter = 1, Uses = [RM] in { // to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level. let Uses = [RM], Defs = [RM] in { def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM), - "mtfsb0 $FM", IntMTFSB0, []>, + "mtfsb0 $FM", IIC_IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), - "mtfsb1 $FM", IntMTFSB0, []>, + "mtfsb1 $FM", IIC_IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT), - "mtfsf $FM, $rT", IntMTFSB0, []>, + "mtfsf $FM, $rT", IIC_IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; } let Uses = [RM] in { def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins), - "mffs $rT", IntMFFS, + "mffs $rT", IIC_IntMFFS, [(set f64:$rT, (PPCmffs))]>, PPC970_DGroup_Single, PPC970_Unit_FPU; } @@ -1991,59 +2106,68 @@ let Uses = [RM] in { let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations. // XO-Form instructions. Arithmetic instructions that can set overflow bit -// +let isCommutable = 1 in defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "add", "$rT, $rA, $rB", IntSimple, + "add", "$rT, $rA, $rB", IIC_IntSimple, [(set i32:$rT, (add i32:$rA, i32:$rB))]>; +let isCodeGenOnly = 1 in +def ADD4TLS : XOForm_1<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, tlsreg32:$rB), + "add $rT, $rA, $rB", IIC_IntSimple, + [(set i32:$rT, (add i32:$rA, tglobaltlsaddr:$rB))]>; +let isCommutable = 1 in defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "addc", "$rT, $rA, $rB", IntGeneral, + "addc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (addc i32:$rA, i32:$rB))]>, PPC970_DGroup_Cracked; + defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divw", "$rT, $rA, $rB", IntDivW, + "divw", "$rT, $rA, $rB", IIC_IntDivW, [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>, PPC970_DGroup_First, PPC970_DGroup_Cracked; defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divwu", "$rT, $rA, $rB", IntDivW, + "divwu", "$rT, $rA, $rB", IIC_IntDivW, [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>, PPC970_DGroup_First, PPC970_DGroup_Cracked; +let isCommutable = 1 in { defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "mulhw", "$rT, $rA, $rB", IntMulHW, + "mulhw", "$rT, $rA, $rB", IIC_IntMulHW, [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>; defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "mulhwu", "$rT, $rA, $rB", IntMulHWU, + "mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU, [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>; defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "mullw", "$rT, $rA, $rB", IntMulHW, + "mullw", "$rT, $rA, $rB", IIC_IntMulHW, [(set i32:$rT, (mul i32:$rA, i32:$rB))]>; +} // isCommutable defm SUBF : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "subf", "$rT, $rA, $rB", IntGeneral, + "subf", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (sub i32:$rB, i32:$rA))]>; defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "subfc", "$rT, $rA, $rB", IntGeneral, + "subfc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (subc i32:$rB, i32:$rA))]>, PPC970_DGroup_Cracked; defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA), - "neg", "$rT, $rA", IntSimple, + "neg", "$rT, $rA", IIC_IntSimple, [(set i32:$rT, (ineg i32:$rA))]>; let Uses = [CARRY] in { +let isCommutable = 1 in defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "adde", "$rT, $rA, $rB", IntGeneral, + "adde", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (adde i32:$rA, i32:$rB))]>; defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA), - "addme", "$rT, $rA", IntGeneral, + "addme", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (adde i32:$rA, -1))]>; defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA), - "addze", "$rT, $rA", IntGeneral, + "addze", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (adde i32:$rA, 0))]>; defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "subfe", "$rT, $rA, $rB", IntGeneral, + "subfe", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (sube i32:$rB, i32:$rA))]>; defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA), - "subfme", "$rT, $rA", IntGeneral, + "subfme", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (sube -1, i32:$rA))]>; defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA), - "subfze", "$rT, $rA", IntGeneral, + "subfze", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (sube 0, i32:$rA))]>; } } @@ -2053,90 +2177,96 @@ defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA), // let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations. let Uses = [RM] in { +let isCommutable = 1 in { defm FMADD : AForm_1r<63, 29, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fmadd", "$FRT, $FRA, $FRC, $FRB", FPFused, + "fmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>; defm FMADDS : AForm_1r<59, 29, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>; defm FMSUB : AForm_1r<63, 28, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fmsub", "$FRT, $FRA, $FRC, $FRB", FPFused, + "fmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>; defm FMSUBS : AForm_1r<59, 28, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>; defm FNMADD : AForm_1r<63, 31, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fnmadd", "$FRT, $FRA, $FRC, $FRB", FPFused, + "fnmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>; defm FNMADDS : AForm_1r<59, 31, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fnmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fnmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>; defm FNMSUB : AForm_1r<63, 30, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fnmsub", "$FRT, $FRA, $FRC, $FRB", FPFused, + "fnmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB))))]>; defm FNMSUBS : AForm_1r<59, 30, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fnmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fnmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB))))]>; +} // isCommutable } // FSEL is artificially split into 4 and 8-byte forms for the result. To avoid // having 4 of these, force the comparison to always be an 8-byte double (code // should use an FMRSD if the input comparison value really wants to be a float) // and 4/8 byte forms for the result and operand type.. -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FSELD : AForm_1r<63, 23, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>; defm FSELS : AForm_1r<63, 23, (outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>; let Uses = [RM] in { + let isCommutable = 1 in { defm FADD : AForm_2r<63, 21, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), - "fadd", "$FRT, $FRA, $FRB", FPAddSub, + "fadd", "$FRT, $FRA, $FRB", IIC_FPAddSub, [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>; defm FADDS : AForm_2r<59, 21, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), - "fadds", "$FRT, $FRA, $FRB", FPGeneral, + "fadds", "$FRT, $FRA, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>; + } // isCommutable defm FDIV : AForm_2r<63, 18, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), - "fdiv", "$FRT, $FRA, $FRB", FPDivD, + "fdiv", "$FRT, $FRA, $FRB", IIC_FPDivD, [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>; defm FDIVS : AForm_2r<59, 18, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), - "fdivs", "$FRT, $FRA, $FRB", FPDivS, + "fdivs", "$FRT, $FRA, $FRB", IIC_FPDivS, [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>; + let isCommutable = 1 in { defm FMUL : AForm_3r<63, 25, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC), - "fmul", "$FRT, $FRA, $FRC", FPFused, + "fmul", "$FRT, $FRA, $FRC", IIC_FPFused, [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>; defm FMULS : AForm_3r<59, 25, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC), - "fmuls", "$FRT, $FRA, $FRC", FPGeneral, + "fmuls", "$FRT, $FRA, $FRC", IIC_FPGeneral, [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>; + } // isCommutable defm FSUB : AForm_2r<63, 20, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), - "fsub", "$FRT, $FRA, $FRB", FPAddSub, + "fsub", "$FRT, $FRA, $FRB", IIC_FPAddSub, [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>; defm FSUBS : AForm_2r<59, 20, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), - "fsubs", "$FRT, $FRA, $FRB", FPGeneral, + "fsubs", "$FRT, $FRA, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>; } } @@ -2146,7 +2276,7 @@ let PPC970_Unit = 1 in { // FXU Operations. let isSelect = 1 in def ISEL : AForm_4<31, 15, (outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond), - "isel $rT, $rA, $rB, $cond", IntGeneral, + "isel $rT, $rA, $rB, $cond", IIC_IntGeneral, []>; } @@ -2157,24 +2287,24 @@ let isCommutable = 1 in { // RLWIMI can be commuted if the rotate amount is zero. defm RLWIMI : MForm_2r<20, (outs gprc:$rA), (ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB, - u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", IntRotate, - []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">, - NoEncode<"$rSi">; + u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", + IIC_IntRotate, []>, PPC970_DGroup_Cracked, + RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; } let BaseName = "rlwinm" in { def RLWINM : MForm_2<21, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), - "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, + "rlwinm $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, []>, RecFormRel; let Defs = [CR0] in def RLWINMo : MForm_2<21, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), - "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral, + "rlwinm. $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, []>, isDOT, RecFormRel, PPC970_DGroup_Cracked; } defm RLWNM : MForm_2r<23, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME), - "rlwnm", "$rA, $rS, $rB, $MB, $ME", IntGeneral, + "rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral, []>; } } // neverHasSideEffects = 1 @@ -2188,8 +2318,10 @@ def : Pat<(i32 imm:$imm), (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>; // Implement the 'not' operation with the NOR instruction. -def NOT : Pat<(not i32:$in), - (NOR $in, $in)>; +def i32not : OutPatFrag<(ops node:$in), + (NOR $in, $in)>; +def : Pat<(not i32:$in), + (i32not $in)>; // ADD an arbitrary immediate. def : Pat<(add i32:$in, imm:$imm), @@ -2260,6 +2392,29 @@ def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)), def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)), (ADDIS $in, tblockaddress:$g)>; +// Support for thread-local storage. +def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT", + [(set i32:$rD, (PPCppc32GOT))]>; + +def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg), + "#LDgotTprelL32", + [(set i32:$rD, + (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>; +def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g), + (ADD4TLS $in, tglobaltlsaddr:$g)>; + +// Support for Position-independent code +def LWZtoc: Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg), + "#LWZtoc", + [(set i32:$rD, + (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>; +// Get Global (GOT) Base Register offset, from the word immediately preceding +// the function label. +def GetGBRO: Pseudo<(outs gprc:$rT), (ins gprc:$rI), "#GetGBRO", []>; +// Update the Global(GOT) Base Register with the above offset. +def UpdateGBR: Pseudo<(outs gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>; + + // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 5-bit and 6-bit shift // amounts. @@ -2315,52 +2470,561 @@ def : Pat<(fcopysign f32:$frB, f64:$frA), include "PPCInstrAltivec.td" include "PPCInstr64Bit.td" +include "PPCInstrVSX.td" + +def crnot : OutPatFrag<(ops node:$in), + (CRNOR $in, $in)>; +def : Pat<(not i1:$in), + (crnot $in)>; + +// Patterns for arithmetic i1 operations. +def : Pat<(add i1:$a, i1:$b), + (CRXOR $a, $b)>; +def : Pat<(sub i1:$a, i1:$b), + (CRXOR $a, $b)>; +def : Pat<(mul i1:$a, i1:$b), + (CRAND $a, $b)>; + +// We're sometimes asked to materialize i1 -1, which is just 1 in this case +// (-1 is used to mean all bits set). +def : Pat<(i1 -1), (CRSET)>; + +// i1 extensions, implemented in terms of isel. +def : Pat<(i32 (zext i1:$in)), + (SELECT_I4 $in, (LI 1), (LI 0))>; +def : Pat<(i32 (sext i1:$in)), + (SELECT_I4 $in, (LI -1), (LI 0))>; + +def : Pat<(i64 (zext i1:$in)), + (SELECT_I8 $in, (LI8 1), (LI8 0))>; +def : Pat<(i64 (sext i1:$in)), + (SELECT_I8 $in, (LI8 -1), (LI8 0))>; + +// FIXME: We should choose either a zext or a sext based on other constants +// already around. +def : Pat<(i32 (anyext i1:$in)), + (SELECT_I4 $in, (LI 1), (LI 0))>; +def : Pat<(i64 (anyext i1:$in)), + (SELECT_I8 $in, (LI8 1), (LI8 0))>; + +// match setcc on i1 variables. +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)), + (CRANDC $s2, $s1)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)), + (CRANDC $s2, $s1)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)), + (CRORC $s2, $s1)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)), + (CRORC $s2, $s1)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)), + (CREQV $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)), + (CRORC $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)), + (CRORC $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)), + (CRANDC $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)), + (CRANDC $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)), + (CRXOR $s1, $s2)>; + +// match setcc on non-i1 (non-vector) variables. Note that SETUEQ, SETOGE, +// SETOLE, SETONE, SETULT and SETUGT should be expanded by legalize for +// floating-point types. + +multiclass CRNotPat<dag pattern, dag result> { + def : Pat<pattern, (crnot result)>; + def : Pat<(not pattern), result>; + + // We can also fold the crnot into an extension: + def : Pat<(i32 (zext pattern)), + (SELECT_I4 result, (LI 0), (LI 1))>; + def : Pat<(i32 (sext pattern)), + (SELECT_I4 result, (LI 0), (LI -1))>; + + // We can also fold the crnot into an extension: + def : Pat<(i64 (zext pattern)), + (SELECT_I8 result, (LI8 0), (LI8 1))>; + def : Pat<(i64 (sext pattern)), + (SELECT_I8 result, (LI8 0), (LI8 -1))>; + + // FIXME: We should choose either a zext or a sext based on other constants + // already around. + def : Pat<(i32 (anyext pattern)), + (SELECT_I4 result, (LI 0), (LI 1))>; + + def : Pat<(i64 (anyext pattern)), + (SELECT_I8 result, (LI8 0), (LI8 1))>; +} + +// FIXME: Because of what seems like a bug in TableGen's type-inference code, +// we need to write imm:$imm in the output patterns below, not just $imm, or +// else the resulting matcher will not correctly add the immediate operand +// (making it a register operand instead). + +// extended SETCC. +multiclass ExtSetCCPat<CondCode cc, PatFrag pfrag, + OutPatFrag rfrag, OutPatFrag rfrag8> { + def : Pat<(i32 (zext (i1 (pfrag i32:$s1, cc)))), + (rfrag $s1)>; + def : Pat<(i64 (zext (i1 (pfrag i64:$s1, cc)))), + (rfrag8 $s1)>; + def : Pat<(i64 (zext (i1 (pfrag i32:$s1, cc)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>; + def : Pat<(i32 (zext (i1 (pfrag i64:$s1, cc)))), + (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>; + + def : Pat<(i32 (anyext (i1 (pfrag i32:$s1, cc)))), + (rfrag $s1)>; + def : Pat<(i64 (anyext (i1 (pfrag i64:$s1, cc)))), + (rfrag8 $s1)>; + def : Pat<(i64 (anyext (i1 (pfrag i32:$s1, cc)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>; + def : Pat<(i32 (anyext (i1 (pfrag i64:$s1, cc)))), + (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>; +} + +// Note that we do all inversions below with i(32|64)not, instead of using +// (xori x, 1) because on the A2 nor has single-cycle latency while xori +// has 2-cycle latency. + +defm : ExtSetCCPat<SETEQ, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (CNTLZW $in), 27, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (CNTLZD $in), 58, 63)> >; + +defm : ExtSetCCPat<SETNE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (i32not (CNTLZW $in)), 27, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (i64not (CNTLZD $in)), 58, 63)> >; + +defm : ExtSetCCPat<SETLT, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM $in, 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL $in, 1, 63)> >; + +defm : ExtSetCCPat<SETGE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (i32not $in), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (i64not $in), 1, 63)> >; + +defm : ExtSetCCPat<SETGT, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (ANDC (NEG $in), $in), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (ANDC8 (NEG8 $in), $in), 1, 63)> >; + +defm : ExtSetCCPat<SETLE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (ORC $in, (NEG $in)), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (ORC8 $in, (NEG8 $in)), 1, 63)> >; + +defm : ExtSetCCPat<SETLT, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, -1, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (AND $in, (ADDI $in, 1)), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (AND8 $in, (ADDI8 $in, 1)), 1, 63)> >; + +defm : ExtSetCCPat<SETGE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, -1, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (NAND $in, (ADDI $in, 1)), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (NAND8 $in, (ADDI8 $in, 1)), 1, 63)> >; + +defm : ExtSetCCPat<SETGT, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, -1, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (i32not $in), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (i64not $in), 1, 63)> >; + +defm : ExtSetCCPat<SETLE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, -1, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM $in, 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL $in, 1, 63)> >; + +// SETCC for i32. +def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULT)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLT)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGT)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGT)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; +def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; + +// For non-equality comparisons, the default code would materialize the +// constant, then compare against it, like this: +// lis r2, 4660 +// ori r2, r2, 22136 +// cmpw cr0, r3, r2 +// beq cr0,L6 +// Since we are just comparing for equality, we can emit this instead: +// xoris r0,r3,0x1234 +// cmplwi cr0,r0,0x5678 +// beq cr0,L6 + +def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETUGT)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; + +// SETCC for i64. +def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLT)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGT)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGT)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>; +def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>; + +// For non-equality comparisons, the default code would materialize the +// constant, then compare against it, like this: +// lis r2, 4660 +// ori r2, r2, 22136 +// cmpd cr0, r3, r2 +// beq cr0,L6 +// Since we are just comparing for equality, we can emit this instead: +// xoris r0,r3,0x1234 +// cmpldi cr0,r0,0x5678 +// beq cr0,L6 + +def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULE)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLE)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>; +defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETULE)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; + +// SETCC for f32. +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; + +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; + +// SETCC for f64. +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; + +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; + +// match select on i1 variables: +def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)), + (CROR (CRAND $cond , $tval), + (CRAND (crnot $cond), $fval))>; + +// match selectcc on i1 variables: +// select (lhs == rhs), tval, fval is: +// ((lhs == rhs) & tval) | (!(lhs == rhs) & fval) +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)), + (CROR (CRAND (CRANDC $rhs, $lhs), $tval), + (CRAND (CRORC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)), + (CROR (CRAND (CRORC $rhs, $lhs), $tval), + (CRAND (CRANDC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)), + (CROR (CRAND (CREQV $lhs, $rhs), $tval), + (CRAND (CRXOR $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)), + (CROR (CRAND (CRORC $lhs, $rhs), $tval), + (CRAND (CRANDC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)), + (CROR (CRAND (CRANDC $lhs, $rhs), $tval), + (CRAND (CRORC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)), + (CROR (CRAND (CREQV $lhs, $rhs), $fval), + (CRAND (CRXOR $lhs, $rhs), $tval))>; + +// match selectcc on i1 variables with non-i1 output. +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)), + (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)), + (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)), + (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)), + (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)), + (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)), + (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)), + (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)), + (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)), + (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)), + (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)), + (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)), + (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), + (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), + (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), + (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), + (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), + (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), + (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), + (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), + (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)), + (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)), + (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)), + (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)), + (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)), + (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)), + (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>; +let usesCustomInserter = 1 in { +def ANDIo_1_EQ_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in), + "#ANDIo_1_EQ_BIT", + [(set i1:$dst, (trunc (not i32:$in)))]>; +def ANDIo_1_GT_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in), + "#ANDIo_1_GT_BIT", + [(set i1:$dst, (trunc i32:$in))]>; + +def ANDIo_1_EQ_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in), + "#ANDIo_1_EQ_BIT8", + [(set i1:$dst, (trunc (not i64:$in)))]>; +def ANDIo_1_GT_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in), + "#ANDIo_1_GT_BIT8", + [(set i1:$dst, (trunc i64:$in))]>; +} + +def : Pat<(i1 (not (trunc i32:$in))), + (ANDIo_1_EQ_BIT $in)>; +def : Pat<(i1 (not (trunc i64:$in))), + (ANDIo_1_EQ_BIT8 $in)>; //===----------------------------------------------------------------------===// // PowerPC Instructions used for assembler/disassembler only // def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins), - "isync", SprISYNC, []>; + "isync", IIC_SprISYNC, []>; def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src), - "icbi $src", LdStICBI, []>; + "icbi $src", IIC_LdStICBI, []>; def EIEIO : XForm_24_eieio<31, 854, (outs), (ins), - "eieio", LdStLoad, []>; + "eieio", IIC_LdStLoad, []>; def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L), - "wait $L", LdStLoad, []>; + "wait $L", IIC_LdStLoad, []>; def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L), - "mtmsr $RS, $L", SprMTMSR>; + "mtmsr $RS, $L", IIC_SprMTMSR>; def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins), - "mfmsr $RT", SprMFMSR, []>; + "mfmsr $RT", IIC_SprMFMSR, []>; def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L), - "mtmsrd $RS, $L", SprMTMSRD>; + "mtmsrd $RS, $L", IIC_SprMTMSRD>; def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB), - "slbie $RB", SprSLBIE, []>; + "slbie $RB", IIC_SprSLBIE, []>; def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB), - "slbmte $RS, $RB", SprSLBMTE, []>; + "slbmte $RS, $RB", IIC_SprSLBMTE, []>; def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB), - "slbmfee $RT, $RB", SprSLBMFEE, []>; + "slbmfee $RT, $RB", IIC_SprSLBMFEE, []>; -def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", SprSLBIA, []>; +def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>; def TLBSYNC : XForm_0<31, 566, (outs), (ins), - "tlbsync", SprTLBSYNC, []>; + "tlbsync", IIC_SprTLBSYNC, []>; def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB), - "tlbiel $RB", SprTLBIEL, []>; + "tlbiel $RB", IIC_SprTLBIEL, []>; def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB), - "tlbie $RB,$RS", SprTLBIE, []>; + "tlbie $RB,$RS", IIC_SprTLBIE, []>; //===----------------------------------------------------------------------===// // PowerPC Assembler Instruction Aliases @@ -2576,19 +3240,19 @@ let PPC970_Unit = 7 in { let Defs = [CTR], Uses = [CTR, LR, RM] in def gBCLR : XLForm_2<19, 16, 0, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), - "bclr $bo, $bi, $bh", BrB, []>; + "bclr $bo, $bi, $bh", IIC_BrB, []>; let Defs = [LR, CTR], Uses = [CTR, LR, RM] in def gBCLRL : XLForm_2<19, 16, 1, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), - "bclrl $bo, $bi, $bh", BrB, []>; + "bclrl $bo, $bi, $bh", IIC_BrB, []>; let Defs = [CTR], Uses = [CTR, LR, RM] in def gBCCTR : XLForm_2<19, 528, 0, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), - "bcctr $bo, $bi, $bh", BrB, []>; + "bcctr $bo, $bi, $bh", IIC_BrB, []>; let Defs = [LR, CTR], Uses = [CTR, LR, RM] in def gBCCTRL : XLForm_2<19, 528, 1, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), - "bcctrl $bo, $bi, $bh", BrB, []>; + "bcctrl $bo, $bi, $bh", IIC_BrB, []>; } def : InstAlias<"bclr $bo, $bi", (gBCLR u5imm:$bo, crbitrc:$bi, 0)>; def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>; @@ -2631,14 +3295,14 @@ multiclass BranchExtendedMnemonicPM<string name, string pm, int bibo> { (BCCA bibo, CR0, abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"lr"#pm#" $cc", - (BCLR bibo, crrc:$cc)>; + (BCCLR bibo, crrc:$cc)>; def : InstAlias<"b"#name#"lr"#pm, - (BCLR bibo, CR0)>; + (BCCLR bibo, CR0)>; def : InstAlias<"b"#name#"ctr"#pm#" $cc", - (BCCTR bibo, crrc:$cc)>; + (BCCCTR bibo, crrc:$cc)>; def : InstAlias<"b"#name#"ctr"#pm, - (BCCTR bibo, CR0)>; + (BCCCTR bibo, CR0)>; def : InstAlias<"b"#name#"l"#pm#" $cc, $dst", (BCCL bibo, crrc:$cc, condbrtarget:$dst)>; @@ -2651,14 +3315,14 @@ multiclass BranchExtendedMnemonicPM<string name, string pm, int bibo> { (BCCLA bibo, CR0, abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"lrl"#pm#" $cc", - (BCLRL bibo, crrc:$cc)>; + (BCCLRL bibo, crrc:$cc)>; def : InstAlias<"b"#name#"lrl"#pm, - (BCLRL bibo, CR0)>; + (BCCLRL bibo, CR0)>; def : InstAlias<"b"#name#"ctrl"#pm#" $cc", - (BCCTRL bibo, crrc:$cc)>; + (BCCCTRL bibo, crrc:$cc)>; def : InstAlias<"b"#name#"ctrl"#pm, - (BCCTRL bibo, CR0)>; + (BCCCTRL bibo, CR0)>; } multiclass BranchExtendedMnemonic<string name, int bibo> { defm : BranchExtendedMnemonicPM<name, "", bibo>; @@ -2682,18 +3346,18 @@ def : InstAlias<"cmpwi $rA, $imm", (CMPWI CR0, gprc:$rA, s16imm:$imm)>; def : InstAlias<"cmpw $rA, $rB", (CMPW CR0, gprc:$rA, gprc:$rB)>; def : InstAlias<"cmplwi $rA, $imm", (CMPLWI CR0, gprc:$rA, u16imm:$imm)>; def : InstAlias<"cmplw $rA, $rB", (CMPLW CR0, gprc:$rA, gprc:$rB)>; -def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm:$imm)>; +def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm64:$imm)>; def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>; -def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm:$imm)>; +def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm64:$imm)>; def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>; def : InstAlias<"cmpi $bf, 0, $rA, $imm", (CMPWI crrc:$bf, gprc:$rA, s16imm:$imm)>; def : InstAlias<"cmp $bf, 0, $rA, $rB", (CMPW crrc:$bf, gprc:$rA, gprc:$rB)>; def : InstAlias<"cmpli $bf, 0, $rA, $imm", (CMPLWI crrc:$bf, gprc:$rA, u16imm:$imm)>; def : InstAlias<"cmpl $bf, 0, $rA, $rB", (CMPLW crrc:$bf, gprc:$rA, gprc:$rB)>; -def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm:$imm)>; +def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm64:$imm)>; def : InstAlias<"cmp $bf, 1, $rA, $rB", (CMPD crrc:$bf, g8rc:$rA, g8rc:$rB)>; -def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm:$imm)>; +def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm64:$imm)>; def : InstAlias<"cmpl $bf, 1, $rA, $rB", (CMPLD crrc:$bf, g8rc:$rA, g8rc:$rB)>; multiclass TrapExtendedMnemonic<string name, int to> { diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td new file mode 100644 index 000000000000..49bcc4876d33 --- /dev/null +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -0,0 +1,816 @@ +//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the VSX extension to the PowerPC instruction set. +// +//===----------------------------------------------------------------------===// + +def PPCRegVSRCAsmOperand : AsmOperandClass { + let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber"; +} +def vsrc : RegisterOperand<VSRC> { + let ParserMatchClass = PPCRegVSRCAsmOperand; +} + +def PPCRegVSFRCAsmOperand : AsmOperandClass { + let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber"; +} +def vsfrc : RegisterOperand<VSFRC> { + let ParserMatchClass = PPCRegVSFRCAsmOperand; +} + +multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XX3Form_Rc<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>; + let Defs = [CR6] in + def o : XX3Form_Rc<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT; + } +} + +def HasVSX : Predicate<"PPCSubTarget->hasVSX()">; +let Predicates = [HasVSX] in { +let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. +let neverHasSideEffects = 1 in { // VSX instructions don't have side effects. +let Uses = [RM] in { + + // Load indexed instructions + let mayLoad = 1, canFoldAsLoad = 1 in { + def LXSDX : XForm_1<31, 588, + (outs vsfrc:$XT), (ins memrr:$src), + "lxsdx $XT, $src", IIC_LdStLFD, + [(set f64:$XT, (load xoaddr:$src))]>; + + def LXVD2X : XForm_1<31, 844, + (outs vsrc:$XT), (ins memrr:$src), + "lxvd2x $XT, $src", IIC_LdStLFD, + [(set v2f64:$XT, (load xoaddr:$src))]>; + + def LXVDSX : XForm_1<31, 332, + (outs vsrc:$XT), (ins memrr:$src), + "lxvdsx $XT, $src", IIC_LdStLFD, []>; + + def LXVW4X : XForm_1<31, 780, + (outs vsrc:$XT), (ins memrr:$src), + "lxvw4x $XT, $src", IIC_LdStLFD, []>; + } + + // Store indexed instructions + let mayStore = 1 in { + def STXSDX : XX1Form<31, 716, + (outs), (ins vsfrc:$XT, memrr:$dst), + "stxsdx $XT, $dst", IIC_LdStSTFD, + [(store f64:$XT, xoaddr:$dst)]>; + + def STXVD2X : XX1Form<31, 972, + (outs), (ins vsrc:$XT, memrr:$dst), + "stxvd2x $XT, $dst", IIC_LdStSTFD, + [(store v2f64:$XT, xoaddr:$dst)]>; + + def STXVW4X : XX1Form<31, 908, + (outs), (ins vsrc:$XT, memrr:$dst), + "stxvw4x $XT, $dst", IIC_LdStSTFD, []>; + } + + // Add/Mul Instructions + let isCommutable = 1 in { + def XSADDDP : XX3Form<60, 32, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsadddp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>; + def XSMULDP : XX3Form<60, 48, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsmuldp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>; + + def XVADDDP : XX3Form<60, 96, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvadddp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>; + + def XVADDSP : XX3Form<60, 64, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvaddsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>; + + def XVMULDP : XX3Form<60, 112, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmuldp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>; + + def XVMULSP : XX3Form<60, 80, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmulsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>; + } + + // Subtract Instructions + def XSSUBDP : XX3Form<60, 40, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xssubdp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>; + + def XVSUBDP : XX3Form<60, 104, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvsubdp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>; + def XVSUBSP : XX3Form<60, 72, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvsubsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>; + + // FMA Instructions + let BaseName = "XSMADDADP" in { + let isCommutable = 1 in + def XSMADDADP : XX3Form<60, 33, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsmaddadp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSMADDMDP : XX3Form<60, 41, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSMSUBADP" in { + let isCommutable = 1 in + def XSMSUBADP : XX3Form<60, 49, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsmsubadp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSMSUBMDP : XX3Form<60, 57, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSNMADDADP" in { + let isCommutable = 1 in + def XSNMADDADP : XX3Form<60, 161, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsnmaddadp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSNMADDMDP : XX3Form<60, 169, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSNMSUBADP" in { + let isCommutable = 1 in + def XSNMSUBADP : XX3Form<60, 177, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsnmsubadp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSNMSUBMDP : XX3Form<60, 185, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVMADDADP" in { + let isCommutable = 1 in + def XVMADDADP : XX3Form<60, 97, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmaddadp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVMADDMDP : XX3Form<60, 105, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVMADDASP" in { + let isCommutable = 1 in + def XVMADDASP : XX3Form<60, 65, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmaddasp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVMADDMSP : XX3Form<60, 73, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVMSUBADP" in { + let isCommutable = 1 in + def XVMSUBADP : XX3Form<60, 113, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmsubadp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVMSUBMDP : XX3Form<60, 121, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVMSUBASP" in { + let isCommutable = 1 in + def XVMSUBASP : XX3Form<60, 81, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmsubasp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVMSUBMSP : XX3Form<60, 89, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVNMADDADP" in { + let isCommutable = 1 in + def XVNMADDADP : XX3Form<60, 225, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmaddadp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVNMADDMDP : XX3Form<60, 233, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVNMADDASP" in { + let isCommutable = 1 in + def XVNMADDASP : XX3Form<60, 193, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmaddasp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVNMADDMSP : XX3Form<60, 201, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVNMSUBADP" in { + let isCommutable = 1 in + def XVNMSUBADP : XX3Form<60, 241, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmsubadp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVNMSUBMDP : XX3Form<60, 249, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVNMSUBASP" in { + let isCommutable = 1 in + def XVNMSUBASP : XX3Form<60, 209, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmsubasp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVNMSUBMSP : XX3Form<60, 217, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + // Division Instructions + def XSDIVDP : XX3Form<60, 56, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsdivdp $XT, $XA, $XB", IIC_FPDivD, + [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>; + def XSSQRTDP : XX2Form<60, 75, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xssqrtdp $XT, $XB", IIC_FPSqrtD, + [(set f64:$XT, (fsqrt f64:$XB))]>; + + def XSREDP : XX2Form<60, 90, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsredp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfre f64:$XB))]>; + def XSRSQRTEDP : XX2Form<60, 74, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrsqrtedp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfrsqrte f64:$XB))]>; + + def XSTDIVDP : XX3Form_1<60, 61, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>; + def XSTSQRTDP : XX2Form_1<60, 106, + (outs crrc:$crD), (ins vsfrc:$XB), + "xstsqrtdp $crD, $XB", IIC_FPCompare, []>; + + def XVDIVDP : XX3Form<60, 120, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvdivdp $XT, $XA, $XB", IIC_FPDivD, + [(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>; + def XVDIVSP : XX3Form<60, 88, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvdivsp $XT, $XA, $XB", IIC_FPDivS, + [(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>; + + def XVSQRTDP : XX2Form<60, 203, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvsqrtdp $XT, $XB", IIC_FPSqrtD, + [(set v2f64:$XT, (fsqrt v2f64:$XB))]>; + def XVSQRTSP : XX2Form<60, 139, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvsqrtsp $XT, $XB", IIC_FPSqrtS, + [(set v4f32:$XT, (fsqrt v4f32:$XB))]>; + + def XVTDIVDP : XX3Form_1<60, 125, + (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), + "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>; + def XVTDIVSP : XX3Form_1<60, 93, + (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), + "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>; + + def XVTSQRTDP : XX2Form_1<60, 234, + (outs crrc:$crD), (ins vsrc:$XB), + "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>; + def XVTSQRTSP : XX2Form_1<60, 170, + (outs crrc:$crD), (ins vsrc:$XB), + "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>; + + def XVREDP : XX2Form<60, 218, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvredp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (PPCfre v2f64:$XB))]>; + def XVRESP : XX2Form<60, 154, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvresp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (PPCfre v4f32:$XB))]>; + + def XVRSQRTEDP : XX2Form<60, 202, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrsqrtedp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (PPCfrsqrte v2f64:$XB))]>; + def XVRSQRTESP : XX2Form<60, 138, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrsqrtesp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (PPCfrsqrte v4f32:$XB))]>; + + // Compare Instructions + def XSCMPODP : XX3Form_1<60, 43, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xscmpodp $crD, $XA, $XB", IIC_FPCompare, []>; + def XSCMPUDP : XX3Form_1<60, 35, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>; + + defm XVCMPEQDP : XX3Form_Rcr<60, 99, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + defm XVCMPEQSP : XX3Form_Rcr<60, 67, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + defm XVCMPGEDP : XX3Form_Rcr<60, 115, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + defm XVCMPGESP : XX3Form_Rcr<60, 83, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + defm XVCMPGTDP : XX3Form_Rcr<60, 107, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + defm XVCMPGTSP : XX3Form_Rcr<60, 75, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>; + + // Move Instructions + def XSABSDP : XX2Form<60, 345, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsabsdp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fabs f64:$XB))]>; + def XSNABSDP : XX2Form<60, 361, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsnabsdp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fneg (fabs f64:$XB)))]>; + def XSNEGDP : XX2Form<60, 377, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsnegdp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fneg f64:$XB))]>; + def XSCPSGNDP : XX3Form<60, 176, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xscpsgndp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>; + + def XVABSDP : XX2Form<60, 473, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvabsdp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fabs v2f64:$XB))]>; + + def XVABSSP : XX2Form<60, 409, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvabssp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fabs v4f32:$XB))]>; + + def XVCPSGNDP : XX3Form<60, 240, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcpsgndp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fcopysign v2f64:$XB, v2f64:$XA))]>; + def XVCPSGNSP : XX3Form<60, 208, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcpsgnsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fcopysign v4f32:$XB, v4f32:$XA))]>; + + def XVNABSDP : XX2Form<60, 489, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvnabsdp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fneg (fabs v2f64:$XB)))]>; + def XVNABSSP : XX2Form<60, 425, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvnabssp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fneg (fabs v4f32:$XB)))]>; + + def XVNEGDP : XX2Form<60, 505, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvnegdp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fneg v2f64:$XB))]>; + def XVNEGSP : XX2Form<60, 441, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvnegsp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fneg v4f32:$XB))]>; + + // Conversion Instructions + def XSCVDPSP : XX2Form<60, 265, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpsp $XT, $XB", IIC_VecFP, []>; + def XSCVDPSXDS : XX2Form<60, 344, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpsxds $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfctidz f64:$XB))]>; + def XSCVDPSXWS : XX2Form<60, 88, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpsxws $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfctiwz f64:$XB))]>; + def XSCVDPUXDS : XX2Form<60, 328, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpuxds $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfctiduz f64:$XB))]>; + def XSCVDPUXWS : XX2Form<60, 72, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpuxws $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfctiwuz f64:$XB))]>; + def XSCVSPDP : XX2Form<60, 329, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvspdp $XT, $XB", IIC_VecFP, []>; + def XSCVSXDDP : XX2Form<60, 376, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvsxddp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfcfid f64:$XB))]>; + def XSCVUXDDP : XX2Form<60, 360, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvuxddp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfcfidu f64:$XB))]>; + + def XVCVDPSP : XX2Form<60, 393, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpsp $XT, $XB", IIC_VecFP, []>; + def XVCVDPSXDS : XX2Form<60, 472, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpsxds $XT, $XB", IIC_VecFP, + [(set v2i64:$XT, (fp_to_sint v2f64:$XB))]>; + def XVCVDPSXWS : XX2Form<60, 216, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpsxws $XT, $XB", IIC_VecFP, []>; + def XVCVDPUXDS : XX2Form<60, 456, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpuxds $XT, $XB", IIC_VecFP, + [(set v2i64:$XT, (fp_to_uint v2f64:$XB))]>; + def XVCVDPUXWS : XX2Form<60, 200, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpuxws $XT, $XB", IIC_VecFP, []>; + + def XVCVSPDP : XX2Form<60, 457, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspdp $XT, $XB", IIC_VecFP, []>; + def XVCVSPSXDS : XX2Form<60, 408, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspsxds $XT, $XB", IIC_VecFP, []>; + def XVCVSPSXWS : XX2Form<60, 152, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspsxws $XT, $XB", IIC_VecFP, []>; + def XVCVSPUXDS : XX2Form<60, 392, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspuxds $XT, $XB", IIC_VecFP, []>; + def XVCVSPUXWS : XX2Form<60, 136, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspuxws $XT, $XB", IIC_VecFP, []>; + def XVCVSXDDP : XX2Form<60, 504, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvsxddp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (sint_to_fp v2i64:$XB))]>; + def XVCVSXDSP : XX2Form<60, 440, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvsxdsp $XT, $XB", IIC_VecFP, []>; + def XVCVSXWDP : XX2Form<60, 248, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvsxwdp $XT, $XB", IIC_VecFP, []>; + def XVCVSXWSP : XX2Form<60, 184, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvsxwsp $XT, $XB", IIC_VecFP, []>; + def XVCVUXDDP : XX2Form<60, 488, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvuxddp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (uint_to_fp v2i64:$XB))]>; + def XVCVUXDSP : XX2Form<60, 424, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvuxdsp $XT, $XB", IIC_VecFP, []>; + def XVCVUXWDP : XX2Form<60, 232, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvuxwdp $XT, $XB", IIC_VecFP, []>; + def XVCVUXWSP : XX2Form<60, 168, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvuxwsp $XT, $XB", IIC_VecFP, []>; + + // Rounding Instructions + def XSRDPI : XX2Form<60, 73, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpi $XT, $XB", IIC_VecFP, + [(set f64:$XT, (frnd f64:$XB))]>; + def XSRDPIC : XX2Form<60, 107, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpic $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fnearbyint f64:$XB))]>; + def XSRDPIM : XX2Form<60, 121, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpim $XT, $XB", IIC_VecFP, + [(set f64:$XT, (ffloor f64:$XB))]>; + def XSRDPIP : XX2Form<60, 105, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpip $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fceil f64:$XB))]>; + def XSRDPIZ : XX2Form<60, 89, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpiz $XT, $XB", IIC_VecFP, + [(set f64:$XT, (ftrunc f64:$XB))]>; + + def XVRDPI : XX2Form<60, 201, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpi $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (frnd v2f64:$XB))]>; + def XVRDPIC : XX2Form<60, 235, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpic $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>; + def XVRDPIM : XX2Form<60, 249, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpim $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (ffloor v2f64:$XB))]>; + def XVRDPIP : XX2Form<60, 233, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpip $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fceil v2f64:$XB))]>; + def XVRDPIZ : XX2Form<60, 217, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpiz $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (ftrunc v2f64:$XB))]>; + + def XVRSPI : XX2Form<60, 137, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspi $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (frnd v4f32:$XB))]>; + def XVRSPIC : XX2Form<60, 171, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspic $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>; + def XVRSPIM : XX2Form<60, 185, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspim $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (ffloor v4f32:$XB))]>; + def XVRSPIP : XX2Form<60, 169, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspip $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fceil v4f32:$XB))]>; + def XVRSPIZ : XX2Form<60, 153, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspiz $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (ftrunc v4f32:$XB))]>; + + // Max/Min Instructions + let isCommutable = 1 in { + def XSMAXDP : XX3Form<60, 160, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>; + def XSMINDP : XX3Form<60, 168, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsmindp $XT, $XA, $XB", IIC_VecFP, []>; + + def XVMAXDP : XX3Form<60, 224, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>; + def XVMINDP : XX3Form<60, 232, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmindp $XT, $XA, $XB", IIC_VecFP, []>; + + def XVMAXSP : XX3Form<60, 192, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>; + def XVMINSP : XX3Form<60, 200, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvminsp $XT, $XA, $XB", IIC_VecFP, []>; + } // isCommutable +} // Uses = [RM] + + // Logical Instructions + let isCommutable = 1 in + def XXLAND : XX3Form<60, 130, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxland $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (and v4i32:$XA, v4i32:$XB))]>; + def XXLANDC : XX3Form<60, 138, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlandc $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (and v4i32:$XA, + (vnot_ppc v4i32:$XB)))]>; + let isCommutable = 1 in { + def XXLNOR : XX3Form<60, 162, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlnor $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (vnot_ppc (or v4i32:$XA, + v4i32:$XB)))]>; + def XXLOR : XX3Form<60, 146, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlor $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (or v4i32:$XA, v4i32:$XB))]>; + let isCodeGenOnly = 1 in + def XXLORf: XX3Form<60, 146, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>; + def XXLXOR : XX3Form<60, 154, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlxor $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>; + } // isCommutable + + // Permutation Instructions + def XXMRGHW : XX3Form<60, 18, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxmrghw $XT, $XA, $XB", IIC_VecPerm, []>; + def XXMRGLW : XX3Form<60, 50, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxmrglw $XT, $XA, $XB", IIC_VecPerm, []>; + + def XXPERMDI : XX3Form_2<60, 10, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM), + "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>; + def XXSEL : XX4Form<60, 3, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC), + "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>; + + def XXSLDWI : XX3Form_2<60, 2, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW), + "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>; + def XXSPLTW : XX2Form_2<60, 164, + (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), + "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; +} // neverHasSideEffects +} // AddedComplexity + +def : InstAlias<"xvmovdp $XT, $XB", + (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; +def : InstAlias<"xvmovsp $XT, $XB", + (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; + +def : InstAlias<"xxspltd $XT, $XB, 0", + (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>; +def : InstAlias<"xxspltd $XT, $XB, 1", + (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>; +def : InstAlias<"xxmrghd $XT, $XA, $XB", + (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>; +def : InstAlias<"xxmrgld $XT, $XA, $XB", + (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>; +def : InstAlias<"xxswapd $XT, $XB", + (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>; + +let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. +def : Pat<(v2f64 (scalar_to_vector f64:$A)), + (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>; + +def : Pat<(f64 (vector_extract v2f64:$S, 0)), + (f64 (EXTRACT_SUBREG $S, sub_64))>; +def : Pat<(f64 (vector_extract v2f64:$S, 1)), + (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; + +// Additional fnmsub patterns: -a*c + b == -(a*c - b) +def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), + (XSNMSUBADP $B, $C, $A)>; +def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B), + (XSNMSUBADP $B, $C, $A)>; + +def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B), + (XVNMSUBADP $B, $C, $A)>; +def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B), + (XVNMSUBADP $B, $C, $A)>; + +def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B), + (XVNMSUBASP $B, $C, $A)>; +def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B), + (XVNMSUBASP $B, $C, $A)>; + +def : Pat<(v2f64 (bitconvert v4f32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2f64 (bitconvert v4i32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2f64 (bitconvert v8i16:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2f64 (bitconvert v16i8:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; + +def : Pat<(v4f32 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v4i32 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v8i16 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v16i8 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +def : Pat<(v2i64 (bitconvert v4f32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2i64 (bitconvert v4i32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2i64 (bitconvert v8i16:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2i64 (bitconvert v16i8:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; + +def : Pat<(v4f32 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v4i32 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v8i16 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v16i8 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +def : Pat<(v2f64 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v2i64 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +// sign extension patterns +// To extend "in place" from v2i32 to v2i64, we have input data like: +// | undef | i32 | undef | i32 | +// but xvcvsxwdp expects the input in big-Endian format: +// | i32 | undef | i32 | undef | +// so we need to shift everything to the left by one i32 (word) before +// the conversion. +def : Pat<(sext_inreg v2i64:$C, v2i32), + (XVCVDPSXDS (XVCVSXWDP (XXSLDWI $C, $C, 1)))>; +def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))), + (XVCVSXWDP (XXSLDWI $C, $C, 1))>; + +} // AddedComplexity +} // HasVSX + diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 5e3a48d8bbdb..e5f113a0c030 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -11,10 +11,9 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "PPCJITInfo.h" #include "PPCRelocations.h" -#include "PPCTargetMachine.h" +#include "PPCSubtarget.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -22,8 +21,15 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "jit" + static TargetJITInfo::JITCompilerFn JITCompilerFunction; +PPCJITInfo::PPCJITInfo(PPCSubtarget &STI) + : Subtarget(STI), is64Bit(STI.isPPC64()) { + useGOT = 0; +} + #define BUILD_ADDIS(RD,RS,IMM16) \ ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535)) #define BUILD_ORI(RD,RS,UIMM16) \ @@ -214,6 +220,10 @@ asm( ".text\n" ".align 2\n" ".globl PPC64CompilationCallback\n" +#if _CALL_ELF == 2 + ".type PPC64CompilationCallback,@function\n" +"PPC64CompilationCallback:\n" +#else ".section \".opd\",\"aw\",@progbits\n" ".align 3\n" "PPC64CompilationCallback:\n" @@ -223,6 +233,7 @@ asm( ".align 4\n" ".type PPC64CompilationCallback,@function\n" ".L.PPC64CompilationCallback:\n" +#endif # else asm( ".text\n" @@ -387,7 +398,7 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1) JCE.emitWordBE(0x7d6802a6); // mflr r11 JCE.emitWordBE(0xf9610060); // std r11, 96(r1) - } else if (TM.getSubtargetImpl()->isDarwinABI()){ + } else if (Subtarget.isDarwinABI()){ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) JCE.emitWordBE(0x7d6802a6); // mflr r11 JCE.emitWordBE(0x91610028); // stw r11, 40(r1) diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h index 46d4a08eb687..b6b37ffb852b 100644 --- a/lib/Target/PowerPC/PPCJITInfo.h +++ b/lib/Target/PowerPC/PPCJITInfo.h @@ -18,32 +18,29 @@ #include "llvm/Target/TargetJITInfo.h" namespace llvm { - class PPCTargetMachine; +class PPCSubtarget; +class PPCJITInfo : public TargetJITInfo { +protected: + PPCSubtarget &Subtarget; + bool is64Bit; - class PPCJITInfo : public TargetJITInfo { - protected: - PPCTargetMachine &TM; - bool is64Bit; - public: - PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) { - useGOT = 0; - is64Bit = tmIs64Bit; - } +public: + PPCJITInfo(PPCSubtarget &STI); - virtual StubLayout getStubLayout(); - virtual void *emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE); - virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); - virtual void relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase); - - /// replaceMachineCodeForFunction - Make it so that calling the function - /// whose machine code is at OLD turns into a call to NEW, perhaps by - /// overwriting OLD with a branch to NEW. This is used for self-modifying - /// code. - /// - virtual void replaceMachineCodeForFunction(void *Old, void *New); - }; + StubLayout getStubLayout() override; + void *emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) override; + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; + void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs, + unsigned char *GOTBase) override; + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + void replaceMachineCodeForFunction(void *Old, void *New) override; +}; } #endif diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index f61c8bf0216e..668041371780 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -13,17 +13,21 @@ //===----------------------------------------------------------------------===// #include "PPC.h" +#include "PPCSubtarget.h" #include "MCTargetDesc/PPCMCExpr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Mangler.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) { @@ -32,35 +36,42 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) { static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ + const TargetMachine &TM = AP.TM; + Mangler *Mang = AP.Mang; + const DataLayout *DL = TM.getDataLayout(); MCContext &Ctx = AP.OutContext; + bool isDarwin = TM.getSubtarget<PPCSubtarget>().isDarwin(); SmallString<128> Name; + StringRef Suffix; + if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB) { + if (isDarwin) + Suffix = "$stub"; + } else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) + Suffix = "$non_lazy_ptr"; + + if (!Suffix.empty()) + Name += DL->getPrivateGlobalPrefix(); + + unsigned PrefixLen = Name.size(); + if (!MO.isGlobal()) { assert(MO.isSymbol() && "Isn't a symbol reference"); - Name += AP.MAI->getGlobalPrefix(); - Name += MO.getSymbolName(); - } else { + Mang->getNameWithPrefix(Name, MO.getSymbolName()); + } else { const GlobalValue *GV = MO.getGlobal(); - bool isImplicitlyPrivate = false; - if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB || - (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)) - isImplicitlyPrivate = true; - - AP.Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); + TM.getNameWithPrefix(Name, GV, *Mang); } - + + unsigned OrigLen = Name.size() - PrefixLen; + + Name += Suffix; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen); + // If the target flags on the operand changes the name of the symbol, do that // before we return the symbol. - if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) { - Name += "$stub"; - const char *PGP = AP.MAI->getPrivateGlobalPrefix(); - const char *Prefix = ""; - if (!Name.startswith(PGP)) { - // http://llvm.org/bugs/show_bug.cgi?id=15763 - // all stubs and lazy_ptrs should be local symbols, which need leading 'L' - Prefix = PGP; - } - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Twine(Prefix) + Twine(Name)); + if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && isDarwin) { MachineModuleInfoImpl::StubValueTy &StubSym = getMachOMMI(AP).getFnStubEntry(Sym); if (StubSym.getPointer()) @@ -72,10 +83,9 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ StubValueTy(AP.getSymbol(MO.getGlobal()), !MO.getGlobal()->hasInternalLinkage()); } else { - Name.erase(Name.end()-5, Name.end()); StubSym = MachineModuleInfoImpl:: - StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false); + StubValueTy(Ctx.GetOrCreateSymbol(OrigName), false); } return Sym; } @@ -83,16 +93,13 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ // If the symbol reference is actually to a non_lazy_ptr, not to the symbol, // then add the suffix. if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) { - Name += "$non_lazy_ptr"; - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); - MachineModuleInfoMachO &MachO = getMachOMMI(AP); MachineModuleInfoImpl::StubValueTy &StubSym = (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ? MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym); - if (StubSym.getPointer() == 0) { + if (!StubSym.getPointer()) { assert(MO.isGlobal() && "Extern symbol not handled yet"); StubSym = MachineModuleInfoImpl:: StubValueTy(AP.getSymbol(MO.getGlobal()), @@ -101,7 +108,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ return Sym; } - return Ctx.GetOrCreateSymbol(Name.str()); + return Sym; } static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, @@ -132,6 +139,9 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, break; } + if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin) + RefKind = MCSymbolRefExpr::VK_PLT; + const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx); if (!MO.isJTI() && MO.getOffset()) diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp index 6a0aec842be7..9da1b1b5c754 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp @@ -8,8 +8,16 @@ //===----------------------------------------------------------------------===// #include "PPCMachineFunctionInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; void PPCFunctionInfo::anchor() { } +MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const { + const DataLayout *DL = MF.getTarget().getDataLayout(); + return MF.getContext().GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ + Twine(MF.getFunctionNumber())+"$poff"); +} diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 33f843dfb432..9a2cec744274 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -92,6 +92,12 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// 64-bit SVR4 ABI. SmallVector<unsigned, 3> MustSaveCRs; + /// Hold onto our MachineFunction context. + MachineFunction &MF; + + /// Whether this uses the PIC Base register or not. + bool UsesPICBase; + public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), @@ -109,7 +115,9 @@ public: VarArgsStackOffset(0), VarArgsNumGPR(0), VarArgsNumFPR(0), - CRSpillFrameIndex(0) {} + CRSpillFrameIndex(0), + MF(MF), + UsesPICBase(0) {} int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } @@ -170,6 +178,11 @@ public: const SmallVectorImpl<unsigned> & getMustSaveCRs() const { return MustSaveCRs; } void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); } + + void setUsesPICBase(bool uses) { UsesPICBase = uses; } + bool usesPICBase() const { return UsesPICBase; } + + MCSymbol *getPICOffsetSymbol() const; }; } // end of namespace llvm diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 19ccbfcdb169..9895ee6267aa 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "reginfo" #include "PPCRegisterInfo.h" #include "PPC.h" #include "PPCFrameLowering.h" @@ -27,7 +26,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -43,11 +41,13 @@ #include "llvm/Target/TargetOptions.h" #include <cstdlib> +using namespace llvm; + +#define DEBUG_TYPE "reginfo" + #define GET_REGINFO_TARGET_DESC #include "PPCGenRegisterInfo.inc" -using namespace llvm; - static cl::opt<bool> EnableBasePointer("ppc-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); @@ -97,7 +97,7 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) return &PPC::GPRCRegClass; } -const uint16_t* +const MCPhysReg* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (Subtarget.isDarwinABI()) return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? @@ -199,7 +199,16 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (PPCFI->needsFP(MF)) Reserved.set(PPC::R31); - if (hasBasePointer(MF)) + if (hasBasePointer(MF)) { + if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() && + MF.getTarget().getRelocationModel() == Reloc::PIC_) + Reserved.set(PPC::R29); + else + Reserved.set(PPC::R30); + } + + if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() && + MF.getTarget().getRelocationModel() == Reloc::PIC_) Reserved.set(PPC::R30); // Reserve Altivec registers when Altivec is unavailable. @@ -230,12 +239,33 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case PPC::F8RCRegClassID: case PPC::F4RCRegClassID: case PPC::VRRCRegClassID: + case PPC::VFRCRegClassID: + case PPC::VSLRCRegClassID: + case PPC::VSHRCRegClassID: return 32 - DefaultSafety; + case PPC::VSRCRegClassID: + case PPC::VSFRCRegClassID: + return 64 - DefaultSafety; case PPC::CRRCRegClassID: return 8 - DefaultSafety; } } +const TargetRegisterClass* +PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)const { + if (Subtarget.hasVSX()) { + // With VSX, we can inflate various sub-register classes to the full VSX + // register set. + + if (RC == &PPC::F8RCRegClass) + return &PPC::VSFRCRegClass; + else if (RC == &PPC::VRRCRegClass) + return &PPC::VSRCRegClass; + } + + return TargetRegisterInfo::getLargestLegalSuperClass(RC); +} + //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// @@ -452,6 +482,127 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, MBB.erase(II); } +static unsigned getCRFromCRBit(unsigned SrcReg) { + unsigned Reg = 0; + if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || + SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) + Reg = PPC::CR0; + else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || + SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) + Reg = PPC::CR1; + else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || + SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) + Reg = PPC::CR2; + else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || + SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) + Reg = PPC::CR3; + else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || + SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) + Reg = PPC::CR4; + else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || + SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) + Reg = PPC::CR5; + else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || + SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) + Reg = PPC::CR6; + else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || + SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) + Reg = PPC::CR7; + + assert(Reg != 0 && "Invalid CR bit register"); + return Reg; +} + +void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + // Get the instruction. + MachineInstr &MI = *II; // ; SPILL_CRBIT <SrcReg>, <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc dl = MI.getDebugLoc(); + + bool LP64 = Subtarget.isPPC64(); + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + unsigned SrcReg = MI.getOperand(0).getReg(); + + BuildMI(MBB, II, dl, TII.get(TargetOpcode::KILL), + getCRFromCRBit(SrcReg)) + .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); + + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg) + .addReg(getCRFromCRBit(SrcReg)); + + // If the saved register wasn't CR0LT, shift the bits left so that the bit to + // store is the first one. Mask all but that bit. + unsigned Reg1 = Reg; + Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + + // rlwinm rA, rA, ShiftBits, 0, 0. + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) + .addReg(Reg1, RegState::Kill) + .addImm(getEncodingValue(SrcReg)) + .addImm(0).addImm(0); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW)) + .addReg(Reg, RegState::Kill), + FrameIndex); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + // Get the instruction. + MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CRBIT <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc dl = MI.getDebugLoc(); + + bool LP64 = Subtarget.isPPC64(); + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + unsigned DestReg = MI.getOperand(0).getReg(); + assert(MI.definesRegister(DestReg) && + "RESTORE_CRBIT does not define its destination"); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ), + Reg), FrameIndex); + + BuildMI(MBB, II, dl, TII.get(TargetOpcode::IMPLICIT_DEF), DestReg); + + unsigned RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), RegO) + .addReg(getCRFromCRBit(DestReg)); + + unsigned ShiftBits = getEncodingValue(DestReg); + // rlwimi r11, r10, 32-ShiftBits, ..., ... + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWIMI8 : PPC::RLWIMI), RegO) + .addReg(RegO, RegState::Kill).addReg(Reg, RegState::Kill) + .addImm(ShiftBits ? 32-ShiftBits : 0) + .addImm(ShiftBits).addImm(ShiftBits); + + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF), + getCRFromCRBit(DestReg)) + .addReg(RegO, RegState::Kill) + // Make sure we have a use dependency all the way through this + // sequence of instructions. We can't have the other bits in the CR + // modified in between the mfocrf and the mtocrf. + .addReg(getCRFromCRBit(DestReg), RegState::Implicit); + + // Discard the pseudo instruction. + MBB.erase(II); +} + void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, unsigned FrameIndex) const { // Get the instruction. @@ -595,6 +746,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else if (OpC == PPC::RESTORE_CR) { lowerCRRestore(II, FrameIndex); return; + } else if (OpC == PPC::SPILL_CRBIT) { + lowerCRBitSpilling(II, FrameIndex); + return; + } else if (OpC == PPC::RESTORE_CRBIT) { + lowerCRBitRestore(II, FrameIndex); + return; } else if (OpC == PPC::SPILL_VRSAVE) { lowerVRSAVESpilling(II, FrameIndex); return; @@ -695,7 +852,14 @@ unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const { if (!hasBasePointer(MF)) return getFrameRegister(MF); - return Subtarget.isPPC64() ? PPC::X30 : PPC::R30; + if (Subtarget.isPPC64()) + return PPC::X30; + + if (Subtarget.isSVR4ABI() && + MF.getTarget().getRelocationModel() == Reloc::PIC_) + return PPC::R29; + + return PPC::R30; } bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const { @@ -737,16 +901,6 @@ bool PPCRegisterInfo:: needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { assert(Offset < 0 && "Local offset must be negative"); - unsigned FIOperandNum = 0; - while (!MI->getOperand(FIOperandNum).isFI()) { - ++FIOperandNum; - assert(FIOperandNum < MI->getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } - - unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum); - Offset += MI->getOperand(OffsetOperandNo).getImm(); - // It's the load/store FI references that cause issues, as it can be difficult // to materialize the offset if it won't fit in the literal field. Estimate // based on the size of the local frame and some conservative assumptions @@ -812,11 +966,8 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, .addFrameIndex(FrameIdx).addImm(Offset); } -void -PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, - unsigned BaseReg, int64_t Offset) const { - MachineInstr &MI = *I; - +void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, + int64_t Offset) const { unsigned FIOperandNum = 0; while (!MI.getOperand(FIOperandNum).isFI()) { ++FIOperandNum; @@ -828,10 +979,28 @@ PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum); Offset += MI.getOperand(OffsetOperandNo).getImm(); MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); + + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const MCInstrDesc &MCID = MI.getDesc(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI.constrainRegClass(BaseReg, + TII.getRegClass(MCID, FIOperandNum, this, MF)); } bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const { + unsigned FIOperandNum = 0; + while (!MI->getOperand(FIOperandNum).isFI()) { + ++FIOperandNum; + assert(FIOperandNum < MI->getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + + unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum); + Offset += MI->getOperand(OffsetOperandNo).getImm(); + return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0)); } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index dd3bb405dac3..13a35f6309d4 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -15,8 +15,8 @@ #ifndef POWERPC32_REGISTERINFO_H #define POWERPC32_REGISTERINFO_H -#include "llvm/ADT/DenseMap.h" #include "PPC.h" +#include "llvm/ADT/DenseMap.h" #define GET_REGINFO_HEADER #include "PPCGenRegisterInfo.inc" @@ -34,33 +34,37 @@ public: /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. - virtual const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const; + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override; unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const; + MachineFunction &MF) const override; + + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const override; /// Code Generation virtual methods... - const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - const uint32_t *getCallPreservedMask(CallingConv::ID CC) const; + const MCPhysReg * + getCalleeSavedRegs(const MachineFunction* MF =nullptr) const override; + const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override; const uint32_t *getNoPreservedMask() const; - BitVector getReservedRegs(const MachineFunction &MF) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; /// We require the register scavenger. - bool requiresRegisterScavenging(const MachineFunction &MF) const { + bool requiresRegisterScavenging(const MachineFunction &MF) const override { return true; } - bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { return true; } - bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override { return true; } - virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const { + bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override { return true; } @@ -69,34 +73,39 @@ public: unsigned FrameIndex) const; void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex) const; + void lowerCRBitSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + void lowerCRBitRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; void lowerVRSAVESpilling(MachineBasicBlock::iterator II, unsigned FrameIndex) const; void lowerVRSAVERestore(MachineBasicBlock::iterator II, unsigned FrameIndex) const; bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, - int &FrameIdx) const; + int &FrameIdx) const override; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; // Support for virtual base registers. - bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const; + bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, - int64_t Offset) const; - void resolveFrameIndex(MachineBasicBlock::iterator I, - unsigned BaseReg, int64_t Offset) const; - bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const; + int64_t Offset) const override; + void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, + int64_t Offset) const override; + bool isFrameOffsetLegal(const MachineInstr *MI, + int64_t Offset) const override; // Debug information queries. - unsigned getFrameRegister(const MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const override; // Base pointer (stack realignment) support. unsigned getBaseRegister(const MachineFunction &MF) const; bool hasBasePointer(const MachineFunction &MF) const; bool canRealignStack(const MachineFunction &MF) const; - bool needsStackRealignment(const MachineFunction &MF) const; + bool needsStackRealignment(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 43663ce013e9..b3d145b2cc49 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -16,6 +16,8 @@ def sub_gt : SubRegIndex<1, 1>; def sub_eq : SubRegIndex<1, 2>; def sub_un : SubRegIndex<1, 3>; def sub_32 : SubRegIndex<32>; +def sub_64 : SubRegIndex<64>; +def sub_128 : SubRegIndex<128>; } @@ -47,9 +49,36 @@ class FPR<bits<5> num, string n> : PPCReg<n> { let HWEncoding{4-0} = num; } -// VR - One of the 32 128-bit vector registers -class VR<bits<5> num, string n> : PPCReg<n> { +// VF - One of the 32 64-bit floating-point subregisters of the vector +// registers (used by VSX). +class VF<bits<5> num, string n> : PPCReg<n> { let HWEncoding{4-0} = num; + let HWEncoding{5} = 1; +} + +// VR - One of the 32 128-bit vector registers +class VR<VF SubReg, string n> : PPCReg<n> { + let HWEncoding{4-0} = SubReg.HWEncoding{4-0}; + let HWEncoding{5} = 0; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_64]; +} + +// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar +// floating-point registers. +class VSRL<FPR SubReg, string n> : PPCReg<n> { + let HWEncoding = SubReg.HWEncoding; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_64]; +} + +// VSRH - One of the 32 128-bit VSX registers that overlap with the vector +// registers. +class VSRH<VR SubReg, string n> : PPCReg<n> { + let HWEncoding{4-0} = SubReg.HWEncoding{4-0}; + let HWEncoding{5} = 1; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_128]; } // CR - One of the 8 4-bit condition registers @@ -80,12 +109,27 @@ foreach Index = 0-31 in { DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; } +// Floating-point vector subregisters (for VSX) +foreach Index = 0-31 in { + def VF#Index : VF<Index, "vs" # !add(Index, 32)>; +} + // Vector registers foreach Index = 0-31 in { - def V#Index : VR<Index, "v"#Index>, + def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>, DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; } +// VSX registers +foreach Index = 0-31 in { + def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>, + DwarfRegAlias<!cast<FPR>("F"#Index)>; +} +foreach Index = 0-31 in { + def VSH#Index : VSRH<!cast<VR>("V"#Index), "vs" # !add(Index, 32)>, + DwarfRegAlias<!cast<VR>("V"#Index)>; +} + // The reprsentation of r0 when treated as the constant 0. def ZERO : GPR<0, "0">; def ZERO8 : GP8<ZERO, "0">; @@ -211,17 +255,39 @@ def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>; -def CRBITRC : RegisterClass<"PPC", [i32], 32, - (add CR0LT, CR0GT, CR0EQ, CR0UN, - CR1LT, CR1GT, CR1EQ, CR1UN, - CR2LT, CR2GT, CR2EQ, CR2UN, +// VSX register classes (the allocation order mirrors that of the corresponding +// subregister classes). +def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, + (add (sequence "VSL%u", 0, 13), + (sequence "VSL%u", 31, 14))>; +def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, + (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7, + VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14, + VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30, + VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23, + VSH22, VSH21, VSH20)>; +def VSRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, + (add VSLRC, VSHRC)>; + +// Register classes for the 64-bit "scalar" VSX subregisters. +def VFRC : RegisterClass<"PPC", [f64], 64, + (add VF2, VF3, VF4, VF5, VF0, VF1, VF6, VF7, + VF8, VF9, VF10, VF11, VF12, VF13, VF14, + VF15, VF16, VF17, VF18, VF19, VF31, VF30, + VF29, VF28, VF27, VF26, VF25, VF24, VF23, + VF22, VF21, VF20)>; +def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>; + +def CRBITRC : RegisterClass<"PPC", [i1], 32, + (add CR2LT, CR2GT, CR2EQ, CR2UN, CR3LT, CR3GT, CR3EQ, CR3UN, CR4LT, CR4GT, CR4EQ, CR4UN, CR5LT, CR5GT, CR5EQ, CR5UN, CR6LT, CR6GT, CR6EQ, CR6UN, - CR7LT, CR7GT, CR7EQ, CR7UN)> -{ - let CopyCost = -1; + CR7LT, CR7GT, CR7EQ, CR7UN, + CR1LT, CR1GT, CR1EQ, CR1UN, + CR0LT, CR0GT, CR0EQ, CR0UN)> { + let Size = 32; } def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6, diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 92ba69c2c6b8..1221d4149996 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -8,114 +8,106 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Functional units across PowerPC chips sets -// -def BPU : FuncUnit; // Branch unit -def SLU : FuncUnit; // Store/load unit -def SRU : FuncUnit; // special register unit -def IU1 : FuncUnit; // integer unit 1 (simple) -def IU2 : FuncUnit; // integer unit 2 (complex) -def FPU1 : FuncUnit; // floating point unit 1 -def FPU2 : FuncUnit; // floating point unit 2 -def VPU : FuncUnit; // vector permutation unit -def VIU1 : FuncUnit; // vector integer unit 1 (simple) -def VIU2 : FuncUnit; // vector integer unit 2 (complex) -def VFPU : FuncUnit; // vector floating point unit - -//===----------------------------------------------------------------------===// // Instruction Itinerary classes used for PowerPC // -def IntSimple : InstrItinClass; -def IntGeneral : InstrItinClass; -def IntCompare : InstrItinClass; -def IntDivD : InstrItinClass; -def IntDivW : InstrItinClass; -def IntMFFS : InstrItinClass; -def IntMFVSCR : InstrItinClass; -def IntMTFSB0 : InstrItinClass; -def IntMTSRD : InstrItinClass; -def IntMulHD : InstrItinClass; -def IntMulHW : InstrItinClass; -def IntMulHWU : InstrItinClass; -def IntMulLI : InstrItinClass; -def IntRFID : InstrItinClass; -def IntRotateD : InstrItinClass; -def IntRotateDI : InstrItinClass; -def IntRotate : InstrItinClass; -def IntShift : InstrItinClass; -def IntTrapD : InstrItinClass; -def IntTrapW : InstrItinClass; -def BrB : InstrItinClass; -def BrCR : InstrItinClass; -def BrMCR : InstrItinClass; -def BrMCRX : InstrItinClass; -def LdStDCBA : InstrItinClass; -def LdStDCBF : InstrItinClass; -def LdStDCBI : InstrItinClass; -def LdStLoad : InstrItinClass; -def LdStLoadUpd : InstrItinClass; -def LdStStore : InstrItinClass; -def LdStStoreUpd : InstrItinClass; -def LdStDSS : InstrItinClass; -def LdStICBI : InstrItinClass; -def LdStLD : InstrItinClass; -def LdStLDU : InstrItinClass; -def LdStLDARX : InstrItinClass; -def LdStLFD : InstrItinClass; -def LdStLFDU : InstrItinClass; -def LdStLHA : InstrItinClass; -def LdStLHAU : InstrItinClass; -def LdStLMW : InstrItinClass; -def LdStLVecX : InstrItinClass; -def LdStLWA : InstrItinClass; -def LdStLWARX : InstrItinClass; -def LdStSLBIA : InstrItinClass; -def LdStSLBIE : InstrItinClass; -def LdStSTD : InstrItinClass; -def LdStSTDCX : InstrItinClass; -def LdStSTDU : InstrItinClass; -def LdStSTFD : InstrItinClass; -def LdStSTFDU : InstrItinClass; -def LdStSTVEBX : InstrItinClass; -def LdStSTWCX : InstrItinClass; -def LdStSync : InstrItinClass; -def SprISYNC : InstrItinClass; -def SprMFSR : InstrItinClass; -def SprMTMSR : InstrItinClass; -def SprMTSR : InstrItinClass; -def SprTLBSYNC : InstrItinClass; -def SprMFCR : InstrItinClass; -def SprMFMSR : InstrItinClass; -def SprMFSPR : InstrItinClass; -def SprMFTB : InstrItinClass; -def SprMTSPR : InstrItinClass; -def SprMTSRIN : InstrItinClass; -def SprRFI : InstrItinClass; -def SprSC : InstrItinClass; -def FPGeneral : InstrItinClass; -def FPAddSub : InstrItinClass; -def FPCompare : InstrItinClass; -def FPDivD : InstrItinClass; -def FPDivS : InstrItinClass; -def FPFused : InstrItinClass; -def FPRes : InstrItinClass; -def FPSqrt : InstrItinClass; -def VecGeneral : InstrItinClass; -def VecFP : InstrItinClass; -def VecFPCompare : InstrItinClass; -def VecComplex : InstrItinClass; -def VecPerm : InstrItinClass; -def VecFPRound : InstrItinClass; -def VecVSL : InstrItinClass; -def VecVSR : InstrItinClass; -def SprMTMSRD : InstrItinClass; -def SprSLIE : InstrItinClass; -def SprSLBIE : InstrItinClass; -def SprSLBMTE : InstrItinClass; -def SprSLBMFEE : InstrItinClass; -def SprSLBIA : InstrItinClass; -def SprTLBIEL : InstrItinClass; -def SprTLBIE : InstrItinClass; +def IIC_IntSimple : InstrItinClass; +def IIC_IntGeneral : InstrItinClass; +def IIC_IntCompare : InstrItinClass; +def IIC_IntDivD : InstrItinClass; +def IIC_IntDivW : InstrItinClass; +def IIC_IntMFFS : InstrItinClass; +def IIC_IntMFVSCR : InstrItinClass; +def IIC_IntMTFSB0 : InstrItinClass; +def IIC_IntMTSRD : InstrItinClass; +def IIC_IntMulHD : InstrItinClass; +def IIC_IntMulHW : InstrItinClass; +def IIC_IntMulHWU : InstrItinClass; +def IIC_IntMulLI : InstrItinClass; +def IIC_IntRFID : InstrItinClass; +def IIC_IntRotateD : InstrItinClass; +def IIC_IntRotateDI : InstrItinClass; +def IIC_IntRotate : InstrItinClass; +def IIC_IntShift : InstrItinClass; +def IIC_IntTrapD : InstrItinClass; +def IIC_IntTrapW : InstrItinClass; +def IIC_BrB : InstrItinClass; +def IIC_BrCR : InstrItinClass; +def IIC_BrMCR : InstrItinClass; +def IIC_BrMCRX : InstrItinClass; +def IIC_LdStDCBA : InstrItinClass; +def IIC_LdStDCBF : InstrItinClass; +def IIC_LdStDCBI : InstrItinClass; +def IIC_LdStLoad : InstrItinClass; +def IIC_LdStLoadUpd : InstrItinClass; +def IIC_LdStLoadUpdX : InstrItinClass; +def IIC_LdStStore : InstrItinClass; +def IIC_LdStStoreUpd : InstrItinClass; +def IIC_LdStDSS : InstrItinClass; +def IIC_LdStICBI : InstrItinClass; +def IIC_LdStLD : InstrItinClass; +def IIC_LdStLDU : InstrItinClass; +def IIC_LdStLDUX : InstrItinClass; +def IIC_LdStLDARX : InstrItinClass; +def IIC_LdStLFD : InstrItinClass; +def IIC_LdStLFDU : InstrItinClass; +def IIC_LdStLFDUX : InstrItinClass; +def IIC_LdStLHA : InstrItinClass; +def IIC_LdStLHAU : InstrItinClass; +def IIC_LdStLHAUX : InstrItinClass; +def IIC_LdStLMW : InstrItinClass; +def IIC_LdStLVecX : InstrItinClass; +def IIC_LdStLWA : InstrItinClass; +def IIC_LdStLWARX : InstrItinClass; +def IIC_LdStSLBIA : InstrItinClass; +def IIC_LdStSLBIE : InstrItinClass; +def IIC_LdStSTD : InstrItinClass; +def IIC_LdStSTDCX : InstrItinClass; +def IIC_LdStSTDU : InstrItinClass; +def IIC_LdStSTDUX : InstrItinClass; +def IIC_LdStSTFD : InstrItinClass; +def IIC_LdStSTFDU : InstrItinClass; +def IIC_LdStSTVEBX : InstrItinClass; +def IIC_LdStSTWCX : InstrItinClass; +def IIC_LdStSync : InstrItinClass; +def IIC_SprISYNC : InstrItinClass; +def IIC_SprMFSR : InstrItinClass; +def IIC_SprMTMSR : InstrItinClass; +def IIC_SprMTSR : InstrItinClass; +def IIC_SprTLBSYNC : InstrItinClass; +def IIC_SprMFCR : InstrItinClass; +def IIC_SprMFCRF : InstrItinClass; +def IIC_SprMFMSR : InstrItinClass; +def IIC_SprMFSPR : InstrItinClass; +def IIC_SprMFTB : InstrItinClass; +def IIC_SprMTSPR : InstrItinClass; +def IIC_SprMTSRIN : InstrItinClass; +def IIC_SprRFI : InstrItinClass; +def IIC_SprSC : InstrItinClass; +def IIC_FPGeneral : InstrItinClass; +def IIC_FPAddSub : InstrItinClass; +def IIC_FPCompare : InstrItinClass; +def IIC_FPDivD : InstrItinClass; +def IIC_FPDivS : InstrItinClass; +def IIC_FPFused : InstrItinClass; +def IIC_FPRes : InstrItinClass; +def IIC_FPSqrtD : InstrItinClass; +def IIC_FPSqrtS : InstrItinClass; +def IIC_VecGeneral : InstrItinClass; +def IIC_VecFP : InstrItinClass; +def IIC_VecFPCompare : InstrItinClass; +def IIC_VecComplex : InstrItinClass; +def IIC_VecPerm : InstrItinClass; +def IIC_VecFPRound : InstrItinClass; +def IIC_VecVSL : InstrItinClass; +def IIC_VecVSR : InstrItinClass; +def IIC_SprMTMSRD : InstrItinClass; +def IIC_SprSLIE : InstrItinClass; +def IIC_SprSLBIE : InstrItinClass; +def IIC_SprSLBMTE : InstrItinClass; +def IIC_SprSLBMFEE : InstrItinClass; +def IIC_SprSLBIA : InstrItinClass; +def IIC_SprTLBIEL : InstrItinClass; +def IIC_SprTLBIE : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. @@ -125,6 +117,7 @@ include "PPCSchedule440.td" include "PPCScheduleG4.td" include "PPCScheduleG4Plus.td" include "PPCScheduleG5.td" +include "PPCScheduleP7.td" include "PPCScheduleA2.td" include "PPCScheduleE500mc.td" include "PPCScheduleE5500.td" @@ -136,392 +129,392 @@ include "PPCScheduleE5500.td" // // opcode itinerary class // ====== =============== -// add IntSimple -// addc IntGeneral -// adde IntGeneral -// addi IntSimple -// addic IntGeneral -// addic. IntGeneral -// addis IntSimple -// addme IntGeneral -// addze IntGeneral -// and IntSimple -// andc IntSimple -// andi. IntGeneral -// andis. IntGeneral -// b BrB -// bc BrB -// bcctr BrB -// bclr BrB -// cmp IntCompare -// cmpi IntCompare -// cmpl IntCompare -// cmpli IntCompare -// cntlzd IntRotateD -// cntlzw IntGeneral -// crand BrCR -// crandc BrCR -// creqv BrCR -// crnand BrCR -// crnor BrCR -// cror BrCR -// crorc BrCR -// crxor BrCR -// dcba LdStDCBA -// dcbf LdStDCBF -// dcbi LdStDCBI -// dcbst LdStDCBF -// dcbt LdStLoad -// dcbtst LdStLoad -// dcbz LdStDCBF -// divd IntDivD -// divdu IntDivD -// divw IntDivW -// divwu IntDivW -// dss LdStDSS -// dst LdStDSS -// dstst LdStDSS -// eciwx LdStLoad -// ecowx LdStLoad -// eieio LdStLoad -// eqv IntSimple -// extsb IntSimple -// extsh IntSimple -// extsw IntSimple -// fabs FPGeneral -// fadd FPAddSub -// fadds FPGeneral -// fcfid FPGeneral -// fcmpo FPCompare -// fcmpu FPCompare -// fctid FPGeneral -// fctidz FPGeneral -// fctiw FPGeneral -// fctiwz FPGeneral -// fdiv FPDivD -// fdivs FPDivS -// fmadd FPFused -// fmadds FPGeneral -// fmr FPGeneral -// fmsub FPFused -// fmsubs FPGeneral -// fmul FPFused -// fmuls FPGeneral -// fnabs FPGeneral -// fneg FPGeneral -// fnmadd FPFused -// fnmadds FPGeneral -// fnmsub FPFused -// fnmsubs FPGeneral -// fres FPRes -// frsp FPGeneral -// frsqrte FPGeneral -// fsel FPGeneral -// fsqrt FPSqrt -// fsqrts FPSqrt -// fsub FPAddSub -// fsubs FPGeneral -// icbi LdStICBI -// isync SprISYNC -// lbz LdStLoad -// lbzu LdStLoadUpd -// lbzux LdStLoadUpd -// lbzx LdStLoad -// ld LdStLD -// ldarx LdStLDARX -// ldu LdStLDU -// ldux LdStLDU -// ldx LdStLD -// lfd LdStLFD -// lfdu LdStLFDU -// lfdux LdStLFDU -// lfdx LdStLFD -// lfs LdStLFD -// lfsu LdStLFDU -// lfsux LdStLFDU -// lfsx LdStLFD -// lha LdStLHA -// lhau LdStLHAU -// lhaux LdStLHAU -// lhax LdStLHA -// lhbrx LdStLoad -// lhz LdStLoad -// lhzu LdStLoadUpd -// lhzux LdStLoadUpd -// lhzx LdStLoad -// lmw LdStLMW -// lswi LdStLMW -// lswx LdStLMW -// lvebx LdStLVecX -// lvehx LdStLVecX -// lvewx LdStLVecX -// lvsl LdStLVecX -// lvsr LdStLVecX -// lvx LdStLVecX -// lvxl LdStLVecX -// lwa LdStLWA -// lwarx LdStLWARX -// lwaux LdStLHAU -// lwax LdStLHA -// lwbrx LdStLoad -// lwz LdStLoad -// lwzu LdStLoadUpd -// lwzux LdStLoadUpd -// lwzx LdStLoad -// mcrf BrMCR -// mcrfs FPGeneral -// mcrxr BrMCRX -// mfcr SprMFCR -// mffs IntMFFS -// mfmsr SprMFMSR -// mfspr SprMFSPR -// mfsr SprMFSR -// mfsrin SprMFSR -// mftb SprMFTB -// mfvscr IntMFVSCR -// mtcrf BrMCRX -// mtfsb0 IntMTFSB0 -// mtfsb1 IntMTFSB0 -// mtfsf IntMTFSB0 -// mtfsfi IntMTFSB0 -// mtmsr SprMTMSR -// mtmsrd LdStLD -// mtspr SprMTSPR -// mtsr SprMTSR -// mtsrd IntMTSRD -// mtsrdin IntMTSRD -// mtsrin SprMTSRIN -// mtvscr IntMFVSCR -// mulhd IntMulHD -// mulhdu IntMulHD -// mulhw IntMulHW -// mulhwu IntMulHWU -// mulld IntMulHD -// mulli IntMulLI -// mullw IntMulHW -// nand IntSimple -// neg IntSimple -// nor IntSimple -// or IntSimple -// orc IntSimple -// ori IntSimple -// oris IntSimple -// rfi SprRFI -// rfid IntRFID -// rldcl IntRotateD -// rldcr IntRotateD -// rldic IntRotateDI -// rldicl IntRotateDI -// rldicr IntRotateDI -// rldimi IntRotateDI -// rlwimi IntRotate -// rlwinm IntGeneral -// rlwnm IntGeneral -// sc SprSC -// slbia LdStSLBIA -// slbie LdStSLBIE -// sld IntRotateD -// slw IntGeneral -// srad IntRotateD -// sradi IntRotateDI -// sraw IntShift -// srawi IntShift -// srd IntRotateD -// srw IntGeneral -// stb LdStStore -// stbu LdStStoreUpd -// stbux LdStStoreUpd -// stbx LdStStore -// std LdStSTD -// stdcx. LdStSTDCX -// stdu LdStSTDU -// stdux LdStSTDU -// stdx LdStSTD -// stfd LdStSTFD -// stfdu LdStSTFDU -// stfdux LdStSTFDU -// stfdx LdStSTFD -// stfiwx LdStSTFD -// stfs LdStSTFD -// stfsu LdStSTFDU -// stfsux LdStSTFDU -// stfsx LdStSTFD -// sth LdStStore -// sthbrx LdStStore -// sthu LdStStoreUpd -// sthux LdStStoreUpd -// sthx LdStStore -// stmw LdStLMW -// stswi LdStLMW -// stswx LdStLMW -// stvebx LdStSTVEBX -// stvehx LdStSTVEBX -// stvewx LdStSTVEBX -// stvx LdStSTVEBX -// stvxl LdStSTVEBX -// stw LdStStore -// stwbrx LdStStore -// stwcx. LdStSTWCX -// stwu LdStStoreUpd -// stwux LdStStoreUpd -// stwx LdStStore -// subf IntGeneral -// subfc IntGeneral -// subfe IntGeneral -// subfic IntGeneral -// subfme IntGeneral -// subfze IntGeneral -// sync LdStSync -// td IntTrapD -// tdi IntTrapD -// tlbia LdStSLBIA -// tlbie LdStDCBF -// tlbsync SprTLBSYNC -// tw IntTrapW -// twi IntTrapW -// vaddcuw VecGeneral -// vaddfp VecFP -// vaddsbs VecGeneral -// vaddshs VecGeneral -// vaddsws VecGeneral -// vaddubm VecGeneral -// vaddubs VecGeneral -// vadduhm VecGeneral -// vadduhs VecGeneral -// vadduwm VecGeneral -// vadduws VecGeneral -// vand VecGeneral -// vandc VecGeneral -// vavgsb VecGeneral -// vavgsh VecGeneral -// vavgsw VecGeneral -// vavgub VecGeneral -// vavguh VecGeneral -// vavguw VecGeneral -// vcfsx VecFP -// vcfux VecFP -// vcmpbfp VecFPCompare -// vcmpeqfp VecFPCompare -// vcmpequb VecGeneral -// vcmpequh VecGeneral -// vcmpequw VecGeneral -// vcmpgefp VecFPCompare -// vcmpgtfp VecFPCompare -// vcmpgtsb VecGeneral -// vcmpgtsh VecGeneral -// vcmpgtsw VecGeneral -// vcmpgtub VecGeneral -// vcmpgtuh VecGeneral -// vcmpgtuw VecGeneral -// vctsxs VecFP -// vctuxs VecFP -// vexptefp VecFP -// vlogefp VecFP -// vmaddfp VecFP -// vmaxfp VecFPCompare -// vmaxsb VecGeneral -// vmaxsh VecGeneral -// vmaxsw VecGeneral -// vmaxub VecGeneral -// vmaxuh VecGeneral -// vmaxuw VecGeneral -// vmhaddshs VecComplex -// vmhraddshs VecComplex -// vminfp VecFPCompare -// vminsb VecGeneral -// vminsh VecGeneral -// vminsw VecGeneral -// vminub VecGeneral -// vminuh VecGeneral -// vminuw VecGeneral -// vmladduhm VecComplex -// vmrghb VecPerm -// vmrghh VecPerm -// vmrghw VecPerm -// vmrglb VecPerm -// vmrglh VecPerm -// vmrglw VecPerm -// vmsubfp VecFP -// vmsummbm VecComplex -// vmsumshm VecComplex -// vmsumshs VecComplex -// vmsumubm VecComplex -// vmsumuhm VecComplex -// vmsumuhs VecComplex -// vmulesb VecComplex -// vmulesh VecComplex -// vmuleub VecComplex -// vmuleuh VecComplex -// vmulosb VecComplex -// vmulosh VecComplex -// vmuloub VecComplex -// vmulouh VecComplex -// vnor VecGeneral -// vor VecGeneral -// vperm VecPerm -// vpkpx VecPerm -// vpkshss VecPerm -// vpkshus VecPerm -// vpkswss VecPerm -// vpkswus VecPerm -// vpkuhum VecPerm -// vpkuhus VecPerm -// vpkuwum VecPerm -// vpkuwus VecPerm -// vrefp VecFPRound -// vrfim VecFPRound -// vrfin VecFPRound -// vrfip VecFPRound -// vrfiz VecFPRound -// vrlb VecGeneral -// vrlh VecGeneral -// vrlw VecGeneral -// vrsqrtefp VecFP -// vsel VecGeneral -// vsl VecVSL -// vslb VecGeneral -// vsldoi VecPerm -// vslh VecGeneral -// vslo VecPerm -// vslw VecGeneral -// vspltb VecPerm -// vsplth VecPerm -// vspltisb VecPerm -// vspltish VecPerm -// vspltisw VecPerm -// vspltw VecPerm -// vsr VecVSR -// vsrab VecGeneral -// vsrah VecGeneral -// vsraw VecGeneral -// vsrb VecGeneral -// vsrh VecGeneral -// vsro VecPerm -// vsrw VecGeneral -// vsubcuw VecGeneral -// vsubfp VecFP -// vsubsbs VecGeneral -// vsubshs VecGeneral -// vsubsws VecGeneral -// vsububm VecGeneral -// vsububs VecGeneral -// vsubuhm VecGeneral -// vsubuhs VecGeneral -// vsubuwm VecGeneral -// vsubuws VecGeneral -// vsum2sws VecComplex -// vsum4sbs VecComplex -// vsum4shs VecComplex -// vsum4ubs VecComplex -// vsumsws VecComplex -// vupkhpx VecPerm -// vupkhsb VecPerm -// vupkhsh VecPerm -// vupklpx VecPerm -// vupklsb VecPerm -// vupklsh VecPerm -// vxor VecGeneral -// xor IntSimple -// xori IntSimple -// xoris IntSimple +// add IIC_IntSimple +// addc IIC_IntGeneral +// adde IIC_IntGeneral +// addi IIC_IntSimple +// addic IIC_IntGeneral +// addic. IIC_IntGeneral +// addis IIC_IntSimple +// addme IIC_IntGeneral +// addze IIC_IntGeneral +// and IIC_IntSimple +// andc IIC_IntSimple +// andi. IIC_IntGeneral +// andis. IIC_IntGeneral +// b IIC_BrB +// bc IIC_BrB +// bcctr IIC_BrB +// bclr IIC_BrB +// cmp IIC_IntCompare +// cmpi IIC_IntCompare +// cmpl IIC_IntCompare +// cmpli IIC_IntCompare +// cntlzd IIC_IntRotateD +// cntlzw IIC_IntGeneral +// crand IIC_BrCR +// crandc IIC_BrCR +// creqv IIC_BrCR +// crnand IIC_BrCR +// crnor IIC_BrCR +// cror IIC_BrCR +// crorc IIC_BrCR +// crxor IIC_BrCR +// dcba IIC_LdStDCBA +// dcbf IIC_LdStDCBF +// dcbi IIC_LdStDCBI +// dcbst IIC_LdStDCBF +// dcbt IIC_LdStLoad +// dcbtst IIC_LdStLoad +// dcbz IIC_LdStDCBF +// divd IIC_IntDivD +// divdu IIC_IntDivD +// divw IIC_IntDivW +// divwu IIC_IntDivW +// dss IIC_LdStDSS +// dst IIC_LdStDSS +// dstst IIC_LdStDSS +// eciwx IIC_LdStLoad +// ecowx IIC_LdStLoad +// eieio IIC_LdStLoad +// eqv IIC_IntSimple +// extsb IIC_IntSimple +// extsh IIC_IntSimple +// extsw IIC_IntSimple +// fabs IIC_FPGeneral +// fadd IIC_FPAddSub +// fadds IIC_FPGeneral +// fcfid IIC_FPGeneral +// fcmpo IIC_FPCompare +// fcmpu IIC_FPCompare +// fctid IIC_FPGeneral +// fctidz IIC_FPGeneral +// fctiw IIC_FPGeneral +// fctiwz IIC_FPGeneral +// fdiv IIC_FPDivD +// fdivs IIC_FPDivS +// fmadd IIC_FPFused +// fmadds IIC_FPGeneral +// fmr IIC_FPGeneral +// fmsub IIC_FPFused +// fmsubs IIC_FPGeneral +// fmul IIC_FPFused +// fmuls IIC_FPGeneral +// fnabs IIC_FPGeneral +// fneg IIC_FPGeneral +// fnmadd IIC_FPFused +// fnmadds IIC_FPGeneral +// fnmsub IIC_FPFused +// fnmsubs IIC_FPGeneral +// fres IIC_FPRes +// frsp IIC_FPGeneral +// frsqrte IIC_FPGeneral +// fsel IIC_FPGeneral +// fsqrt IIC_FPSqrtD +// fsqrts IIC_FPSqrtS +// fsub IIC_FPAddSub +// fsubs IIC_FPGeneral +// icbi IIC_LdStICBI +// isync IIC_SprISYNC +// lbz IIC_LdStLoad +// lbzu IIC_LdStLoadUpd +// lbzux IIC_LdStLoadUpdX +// lbzx IIC_LdStLoad +// ld IIC_LdStLD +// ldarx IIC_LdStLDARX +// ldu IIC_LdStLDU +// ldux IIC_LdStLDUX +// ldx IIC_LdStLD +// lfd IIC_LdStLFD +// lfdu IIC_LdStLFDU +// lfdux IIC_LdStLFDUX +// lfdx IIC_LdStLFD +// lfs IIC_LdStLFD +// lfsu IIC_LdStLFDU +// lfsux IIC_LdStLFDUX +// lfsx IIC_LdStLFD +// lha IIC_LdStLHA +// lhau IIC_LdStLHAU +// lhaux IIC_LdStLHAUX +// lhax IIC_LdStLHA +// lhbrx IIC_LdStLoad +// lhz IIC_LdStLoad +// lhzu IIC_LdStLoadUpd +// lhzux IIC_LdStLoadUpdX +// lhzx IIC_LdStLoad +// lmw IIC_LdStLMW +// lswi IIC_LdStLMW +// lswx IIC_LdStLMW +// lvebx IIC_LdStLVecX +// lvehx IIC_LdStLVecX +// lvewx IIC_LdStLVecX +// lvsl IIC_LdStLVecX +// lvsr IIC_LdStLVecX +// lvx IIC_LdStLVecX +// lvxl IIC_LdStLVecX +// lwa IIC_LdStLWA +// lwarx IIC_LdStLWARX +// lwaux IIC_LdStLHAUX +// lwax IIC_LdStLHA +// lwbrx IIC_LdStLoad +// lwz IIC_LdStLoad +// lwzu IIC_LdStLoadUpd +// lwzux IIC_LdStLoadUpdX +// lwzx IIC_LdStLoad +// mcrf IIC_BrMCR +// mcrfs IIC_FPGeneral +// mcrxr IIC_BrMCRX +// mfcr IIC_SprMFCR +// mffs IIC_IntMFFS +// mfmsr IIC_SprMFMSR +// mfspr IIC_SprMFSPR +// mfsr IIC_SprMFSR +// mfsrin IIC_SprMFSR +// mftb IIC_SprMFTB +// mfvscr IIC_IntMFVSCR +// mtcrf IIC_BrMCRX +// mtfsb0 IIC_IntMTFSB0 +// mtfsb1 IIC_IntMTFSB0 +// mtfsf IIC_IntMTFSB0 +// mtfsfi IIC_IntMTFSB0 +// mtmsr IIC_SprMTMSR +// mtmsrd IIC_LdStLD +// mtspr IIC_SprMTSPR +// mtsr IIC_SprMTSR +// mtsrd IIC_IntMTSRD +// mtsrdin IIC_IntMTSRD +// mtsrin IIC_SprMTSRIN +// mtvscr IIC_IntMFVSCR +// mulhd IIC_IntMulHD +// mulhdu IIC_IntMulHD +// mulhw IIC_IntMulHW +// mulhwu IIC_IntMulHWU +// mulld IIC_IntMulHD +// mulli IIC_IntMulLI +// mullw IIC_IntMulHW +// nand IIC_IntSimple +// neg IIC_IntSimple +// nor IIC_IntSimple +// or IIC_IntSimple +// orc IIC_IntSimple +// ori IIC_IntSimple +// oris IIC_IntSimple +// rfi IIC_SprRFI +// rfid IIC_IntRFID +// rldcl IIC_IntRotateD +// rldcr IIC_IntRotateD +// rldic IIC_IntRotateDI +// rldicl IIC_IntRotateDI +// rldicr IIC_IntRotateDI +// rldimi IIC_IntRotateDI +// rlwimi IIC_IntRotate +// rlwinm IIC_IntGeneral +// rlwnm IIC_IntGeneral +// sc IIC_SprSC +// slbia IIC_LdStSLBIA +// slbie IIC_LdStSLBIE +// sld IIC_IntRotateD +// slw IIC_IntGeneral +// srad IIC_IntRotateD +// sradi IIC_IntRotateDI +// sraw IIC_IntShift +// srawi IIC_IntShift +// srd IIC_IntRotateD +// srw IIC_IntGeneral +// stb IIC_LdStStore +// stbu IIC_LdStStoreUpd +// stbux IIC_LdStStoreUpd +// stbx IIC_LdStStore +// std IIC_LdStSTD +// stdcx. IIC_LdStSTDCX +// stdu IIC_LdStSTDU +// stdux IIC_LdStSTDUX +// stdx IIC_LdStSTD +// stfd IIC_LdStSTFD +// stfdu IIC_LdStSTFDU +// stfdux IIC_LdStSTFDU +// stfdx IIC_LdStSTFD +// stfiwx IIC_LdStSTFD +// stfs IIC_LdStSTFD +// stfsu IIC_LdStSTFDU +// stfsux IIC_LdStSTFDU +// stfsx IIC_LdStSTFD +// sth IIC_LdStStore +// sthbrx IIC_LdStStore +// sthu IIC_LdStStoreUpd +// sthux IIC_LdStStoreUpd +// sthx IIC_LdStStore +// stmw IIC_LdStLMW +// stswi IIC_LdStLMW +// stswx IIC_LdStLMW +// stvebx IIC_LdStSTVEBX +// stvehx IIC_LdStSTVEBX +// stvewx IIC_LdStSTVEBX +// stvx IIC_LdStSTVEBX +// stvxl IIC_LdStSTVEBX +// stw IIC_LdStStore +// stwbrx IIC_LdStStore +// stwcx. IIC_LdStSTWCX +// stwu IIC_LdStStoreUpd +// stwux IIC_LdStStoreUpd +// stwx IIC_LdStStore +// subf IIC_IntGeneral +// subfc IIC_IntGeneral +// subfe IIC_IntGeneral +// subfic IIC_IntGeneral +// subfme IIC_IntGeneral +// subfze IIC_IntGeneral +// sync IIC_LdStSync +// td IIC_IntTrapD +// tdi IIC_IntTrapD +// tlbia IIC_LdStSLBIA +// tlbie IIC_LdStDCBF +// tlbsync IIC_SprTLBSYNC +// tw IIC_IntTrapW +// twi IIC_IntTrapW +// vaddcuw IIC_VecGeneral +// vaddfp IIC_VecFP +// vaddsbs IIC_VecGeneral +// vaddshs IIC_VecGeneral +// vaddsws IIC_VecGeneral +// vaddubm IIC_VecGeneral +// vaddubs IIC_VecGeneral +// vadduhm IIC_VecGeneral +// vadduhs IIC_VecGeneral +// vadduwm IIC_VecGeneral +// vadduws IIC_VecGeneral +// vand IIC_VecGeneral +// vandc IIC_VecGeneral +// vavgsb IIC_VecGeneral +// vavgsh IIC_VecGeneral +// vavgsw IIC_VecGeneral +// vavgub IIC_VecGeneral +// vavguh IIC_VecGeneral +// vavguw IIC_VecGeneral +// vcfsx IIC_VecFP +// vcfux IIC_VecFP +// vcmpbfp IIC_VecFPCompare +// vcmpeqfp IIC_VecFPCompare +// vcmpequb IIC_VecGeneral +// vcmpequh IIC_VecGeneral +// vcmpequw IIC_VecGeneral +// vcmpgefp IIC_VecFPCompare +// vcmpgtfp IIC_VecFPCompare +// vcmpgtsb IIC_VecGeneral +// vcmpgtsh IIC_VecGeneral +// vcmpgtsw IIC_VecGeneral +// vcmpgtub IIC_VecGeneral +// vcmpgtuh IIC_VecGeneral +// vcmpgtuw IIC_VecGeneral +// vctsxs IIC_VecFP +// vctuxs IIC_VecFP +// vexptefp IIC_VecFP +// vlogefp IIC_VecFP +// vmaddfp IIC_VecFP +// vmaxfp IIC_VecFPCompare +// vmaxsb IIC_VecGeneral +// vmaxsh IIC_VecGeneral +// vmaxsw IIC_VecGeneral +// vmaxub IIC_VecGeneral +// vmaxuh IIC_VecGeneral +// vmaxuw IIC_VecGeneral +// vmhaddshs IIC_VecComplex +// vmhraddshs IIC_VecComplex +// vminfp IIC_VecFPCompare +// vminsb IIC_VecGeneral +// vminsh IIC_VecGeneral +// vminsw IIC_VecGeneral +// vminub IIC_VecGeneral +// vminuh IIC_VecGeneral +// vminuw IIC_VecGeneral +// vmladduhm IIC_VecComplex +// vmrghb IIC_VecPerm +// vmrghh IIC_VecPerm +// vmrghw IIC_VecPerm +// vmrglb IIC_VecPerm +// vmrglh IIC_VecPerm +// vmrglw IIC_VecPerm +// vmsubfp IIC_VecFP +// vmsummbm IIC_VecComplex +// vmsumshm IIC_VecComplex +// vmsumshs IIC_VecComplex +// vmsumubm IIC_VecComplex +// vmsumuhm IIC_VecComplex +// vmsumuhs IIC_VecComplex +// vmulesb IIC_VecComplex +// vmulesh IIC_VecComplex +// vmuleub IIC_VecComplex +// vmuleuh IIC_VecComplex +// vmulosb IIC_VecComplex +// vmulosh IIC_VecComplex +// vmuloub IIC_VecComplex +// vmulouh IIC_VecComplex +// vnor IIC_VecGeneral +// vor IIC_VecGeneral +// vperm IIC_VecPerm +// vpkpx IIC_VecPerm +// vpkshss IIC_VecPerm +// vpkshus IIC_VecPerm +// vpkswss IIC_VecPerm +// vpkswus IIC_VecPerm +// vpkuhum IIC_VecPerm +// vpkuhus IIC_VecPerm +// vpkuwum IIC_VecPerm +// vpkuwus IIC_VecPerm +// vrefp IIC_VecFPRound +// vrfim IIC_VecFPRound +// vrfin IIC_VecFPRound +// vrfip IIC_VecFPRound +// vrfiz IIC_VecFPRound +// vrlb IIC_VecGeneral +// vrlh IIC_VecGeneral +// vrlw IIC_VecGeneral +// vrsqrtefp IIC_VecFP +// vsel IIC_VecGeneral +// vsl IIC_VecVSL +// vslb IIC_VecGeneral +// vsldoi IIC_VecPerm +// vslh IIC_VecGeneral +// vslo IIC_VecPerm +// vslw IIC_VecGeneral +// vspltb IIC_VecPerm +// vsplth IIC_VecPerm +// vspltisb IIC_VecPerm +// vspltish IIC_VecPerm +// vspltisw IIC_VecPerm +// vspltw IIC_VecPerm +// vsr IIC_VecVSR +// vsrab IIC_VecGeneral +// vsrah IIC_VecGeneral +// vsraw IIC_VecGeneral +// vsrb IIC_VecGeneral +// vsrh IIC_VecGeneral +// vsro IIC_VecPerm +// vsrw IIC_VecGeneral +// vsubcuw IIC_VecGeneral +// vsubfp IIC_VecFP +// vsubsbs IIC_VecGeneral +// vsubshs IIC_VecGeneral +// vsubsws IIC_VecGeneral +// vsububm IIC_VecGeneral +// vsububs IIC_VecGeneral +// vsubuhm IIC_VecGeneral +// vsubuhs IIC_VecGeneral +// vsubuwm IIC_VecGeneral +// vsubuws IIC_VecGeneral +// vsum2sws IIC_VecComplex +// vsum4sbs IIC_VecComplex +// vsum4shs IIC_VecComplex +// vsum4ubs IIC_VecComplex +// vsumsws IIC_VecComplex +// vupkhpx IIC_VecPerm +// vupkhsb IIC_VecPerm +// vupkhsh IIC_VecPerm +// vupklpx IIC_VecPerm +// vupklsb IIC_VecPerm +// vupklsh IIC_VecPerm +// vxor IIC_VecGeneral +// xor IIC_IntSimple +// xori IIC_IntSimple +// xoris IIC_IntSimple // diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td index 37b6eac10cfe..218fed248a31 100644 --- a/lib/Target/PowerPC/PPCSchedule440.td +++ b/lib/Target/PowerPC/PPCSchedule440.td @@ -26,43 +26,39 @@ //===----------------------------------------------------------------------===// // Functional units on the PowerPC 440/450 chip sets // -def IFTH1 : FuncUnit; // Fetch unit 1 -def IFTH2 : FuncUnit; // Fetch unit 2 -def PDCD1 : FuncUnit; // Decode unit 1 -def PDCD2 : FuncUnit; // Decode unit 2 -def DISS1 : FuncUnit; // Issue unit 1 -def DISS2 : FuncUnit; // Issue unit 2 -def LRACC : FuncUnit; // Register access and dispatch for - // the simple integer (J-pipe) and - // load/store (L-pipe) pipelines -def IRACC : FuncUnit; // Register access and dispatch for - // the complex integer (I-pipe) pipeline -def FRACC : FuncUnit; // Register access and dispatch for - // the floating-point execution (F-pipe) pipeline -def IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline -def IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline -def IWB : FuncUnit; // Write-back unit for the I pipeline -def JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline -def JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline -def JWB : FuncUnit; // Write-back unit for the J pipeline -def AGEN : FuncUnit; // Address generation for the L pipeline -def CRD : FuncUnit; // D-cache access for the L pipeline -def LWB : FuncUnit; // Write-back unit for the L pipeline -def FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline -def FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline -def FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline -def FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline -def FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline -def FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline -def FWB : FuncUnit; // Write-back unit for the F pipeline +def P440_DISS1 : FuncUnit; // Issue unit 1 +def P440_DISS2 : FuncUnit; // Issue unit 2 +def P440_LRACC : FuncUnit; // Register access and dispatch for + // the simple integer (J-pipe) and + // load/store (L-pipe) pipelines +def P440_IRACC : FuncUnit; // Register access and dispatch for + // the complex integer (I-pipe) pipeline +def P440_FRACC : FuncUnit; // Register access and dispatch for + // the floating-point execution (F-pipe) pipeline +def P440_IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline +def P440_IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline +def P440_IWB : FuncUnit; // Write-back unit for the I pipeline +def P440_JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline +def P440_JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline +def P440_JWB : FuncUnit; // Write-back unit for the J pipeline +def P440_AGEN : FuncUnit; // Address generation for the L pipeline +def P440_CRD : FuncUnit; // D-cache access for the L pipeline +def P440_LWB : FuncUnit; // Write-back unit for the L pipeline +def P440_FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline +def P440_FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline +def P440_FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline +def P440_FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline +def P440_FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline +def P440_FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline +def P440_FWB : FuncUnit; // Write-back unit for the F pipeline -def LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used - // to make sure that no lwarx/stwcx. - // instructions are issued while another - // lwarx/stwcx. is in the L pipe. +def P440_LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used + // to make sure that no lwarx/stwcx. + // instructions are issued while another + // lwarx/stwcx. is in the L pipe. -def GPR_Bypass : Bypass; // The bypass for general-purpose regs. -def FPR_Bypass : Bypass; // The bypass for floating-point regs. +def P440_GPR_Bypass : Bypass; // The bypass for general-purpose regs. +def P440_FPR_Bypass : Bypass; // The bypass for floating-point regs. // Notes: // Instructions are held in the FRACC, LRACC and IRACC pipeline @@ -104,560 +100,500 @@ def FPR_Bypass : Bypass; // The bypass for floating-point regs. def PPC440Itineraries : ProcessorItineraries< - [IFTH1, IFTH2, PDCD1, PDCD2, DISS1, DISS2, FRACC, - IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB, - FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold], - [GPR_Bypass, FPR_Bypass], [ - InstrItinData<IntSimple , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntGeneral , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntCompare , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntDivW , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<33, [IWB]>], - [40, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMFFS , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [7, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMTFSB0 , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [7, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMulHW , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMulHWU , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMulLI , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntRotate , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntShift , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntTrapW , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [6, 4], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<BrB , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData<BrCR , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<BrMCR , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<BrMCRX , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStDCBA , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStDCBF , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStDCBI , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStLoad , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [9, 5], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLoadUpd , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [9, 5], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStStoreUpd, [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSTFD , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5, 5], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStSTFDU , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5, 5], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [9, 5, 5], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLFDU , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [9, 5, 5], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLHA , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStLHAU , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStLWARX , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1]>, - InstrStage<1, [IRACC], 0>, - InstrStage<4, [LWARX_Hold], 0>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSTDU , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1]>, - InstrStage<1, [IRACC], 0>, - InstrStage<4, [LWARX_Hold], 0>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSTWCX , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1]>, - InstrStage<1, [IRACC], 0>, - InstrStage<4, [LWARX_Hold], 0>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSync , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<3, [AGEN], 1>, - InstrStage<2, [CRD], 1>, - InstrStage<1, [LWB]>]>, - InstrItinData<SprISYNC , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC], 0>, - InstrStage<1, [LRACC], 0>, - InstrStage<1, [IRACC]>, - InstrStage<1, [FEXE1], 0>, - InstrStage<1, [AGEN], 0>, - InstrStage<1, [JEXE1], 0>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [FEXE2], 0>, - InstrStage<1, [CRD], 0>, - InstrStage<1, [JEXE2], 0>, - InstrStage<1, [IEXE2]>, - InstrStage<6, [FEXE3], 0>, - InstrStage<6, [LWB], 0>, - InstrStage<6, [JWB], 0>, - InstrStage<6, [IWB]>]>, - InstrItinData<SprMFSR , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [6, 4], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<SprMTMSR , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [6, 4], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<SprMTSR , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [9, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprTLBSYNC , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>]>, - InstrItinData<SprMFCR , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprMFMSR , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [7, 4], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<SprMFSPR , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [10, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprMFTB , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [10, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprMTSPR , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [10, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprMTSRIN , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [10, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprRFI , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprSC , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData<FPGeneral , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4, 4], - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPAddSub , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4, 4], - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4, 4], - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPDivD , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<25, [FWB]>], - [35, 4, 4], - [NoBypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPDivS , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<13, [FWB]>], - [23, 4, 4], - [NoBypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPFused , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4, 4, 4], - [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPRes , [InstrStage<1, [IFTH1, IFTH2]>, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4], - [FPR_Bypass, FPR_Bypass]> + [P440_DISS1, P440_DISS2, P440_FRACC, P440_IRACC, P440_IEXE1, P440_IEXE2, + P440_IWB, P440_LRACC, P440_JEXE1, P440_JEXE2, P440_JWB, P440_AGEN, P440_CRD, + P440_LWB, P440_FEXE1, P440_FEXE2, P440_FEXE3, P440_FEXE4, P440_FEXE5, + P440_FEXE6, P440_FWB, P440_LWARX_Hold], + [P440_GPR_Bypass, P440_FPR_Bypass], [ + InstrItinData<IIC_IntSimple, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntGeneral, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntDivW, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<33, [P440_IWB]>], + [36, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntMFFS, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [3, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [3, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntMulHW, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntMulHWU, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntMulLI, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntRotate, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntShift, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntTrapW, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [2, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_BrB, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_BrCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_BrMCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_BrMCRX, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBA, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBF, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLoad, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 1, 1], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLoadUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 2, 1, 1], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 2, 1, 1], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStStore, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStICBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSTFD, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLFD, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLFDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [5, 2, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [5, 2, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLHA, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLHAU, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLMW, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P440_DISS1]>, + InstrStage<1, [P440_IRACC], 0>, + InstrStage<4, [P440_LWARX_Hold], 0>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSTD, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSTDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [P440_DISS1]>, + InstrStage<1, [P440_IRACC], 0>, + InstrStage<4, [P440_LWARX_Hold], 0>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [P440_DISS1]>, + InstrStage<1, [P440_IRACC], 0>, + InstrStage<4, [P440_LWARX_Hold], 0>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSync, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<3, [P440_AGEN], 1>, + InstrStage<2, [P440_CRD], 1>, + InstrStage<1, [P440_LWB]>]>, + InstrItinData<IIC_SprISYNC, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC], 0>, + InstrStage<1, [P440_LRACC], 0>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_FEXE1], 0>, + InstrStage<1, [P440_AGEN], 0>, + InstrStage<1, [P440_JEXE1], 0>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_FEXE2], 0>, + InstrStage<1, [P440_CRD], 0>, + InstrStage<1, [P440_JEXE2], 0>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<6, [P440_FEXE3], 0>, + InstrStage<6, [P440_LWB], 0>, + InstrStage<6, [P440_JWB], 0>, + InstrStage<6, [P440_IWB]>]>, + InstrItinData<IIC_SprMFSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [2, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprMTMSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [2, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprMTSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [5, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>]>, + InstrItinData<IIC_SprMFCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprMFMSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [3, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprMFSPR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprMFTB, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprMTSPR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprMTSRIN, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprRFI, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprSC, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_FPGeneral, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0], + [P440_FPR_Bypass, + P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData<IIC_FPAddSub, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0], + [P440_FPR_Bypass, + P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData<IIC_FPCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0], + [P440_FPR_Bypass, P440_FPR_Bypass, + P440_FPR_Bypass]>, + InstrItinData<IIC_FPDivD, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<25, [P440_FWB]>], + [31, 0, 0], + [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData<IIC_FPDivS, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<13, [P440_FWB]>], + [19, 0, 0], + [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData<IIC_FPFused, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0, 0], + [P440_FPR_Bypass, + P440_FPR_Bypass, P440_FPR_Bypass, + P440_FPR_Bypass]>, + InstrItinData<IIC_FPRes, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0], + [P440_FPR_Bypass, P440_FPR_Bypass]> ]>; + +// ===---------------------------------------------------------------------===// +// PPC440 machine model for scheduling and other instruction cost heuristics. + +def PPC440Model : SchedMachineModel { + let IssueWidth = 2; // 2 instructions are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 5; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let Itineraries = PPC440Itineraries; +} + diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td index 1612cd2a0b84..14476963bad0 100644 --- a/lib/Target/PowerPC/PPCScheduleA2.td +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -14,8 +14,8 @@ //===----------------------------------------------------------------------===// // Functional units on the PowerPC A2 chip sets // -def XU : FuncUnit; // XU pipeline -def FU : FuncUnit; // FI pipeline +def A2_XU : FuncUnit; // A2_XU pipeline +def A2_FU : FuncUnit; // FI pipeline // // This file defines the itinerary class data for the PPC A2 processor. @@ -24,126 +24,140 @@ def FU : FuncUnit; // FI pipeline def PPCA2Itineraries : ProcessorItineraries< - [XU, FU], [], [ - InstrItinData<IntSimple , [InstrStage<1, [XU]>], - [1, 1, 1]>, - InstrItinData<IntGeneral , [InstrStage<1, [XU]>], - [2, 1, 1]>, - InstrItinData<IntCompare , [InstrStage<1, [XU]>], - [2, 1, 1]>, - InstrItinData<IntDivW , [InstrStage<1, [XU]>], - [39, 1, 1]>, - InstrItinData<IntDivD , [InstrStage<1, [XU]>], - [71, 1, 1]>, - InstrItinData<IntMulHW , [InstrStage<1, [XU]>], - [5, 1, 1]>, - InstrItinData<IntMulHWU , [InstrStage<1, [XU]>], - [5, 1, 1]>, - InstrItinData<IntMulLI , [InstrStage<1, [XU]>], - [6, 1, 1]>, - InstrItinData<IntRotate , [InstrStage<1, [XU]>], - [2, 1, 1]>, - InstrItinData<IntRotateD , [InstrStage<1, [XU]>], - [2, 1, 1]>, - InstrItinData<IntRotateDI , [InstrStage<1, [XU]>], - [2, 1, 1]>, - InstrItinData<IntShift , [InstrStage<1, [XU]>], - [2, 1, 1]>, - InstrItinData<IntTrapW , [InstrStage<1, [XU]>], - [2, 1]>, - InstrItinData<IntTrapD , [InstrStage<1, [XU]>], - [2, 1]>, - InstrItinData<BrB , [InstrStage<1, [XU]>], - [6, 1, 1]>, - InstrItinData<BrCR , [InstrStage<1, [XU]>], - [1, 1, 1]>, - InstrItinData<BrMCR , [InstrStage<1, [XU]>], - [5, 1, 1]>, - InstrItinData<BrMCRX , [InstrStage<1, [XU]>], - [1, 1, 1]>, - InstrItinData<LdStDCBA , [InstrStage<1, [XU]>], - [1, 1, 1]>, - InstrItinData<LdStDCBF , [InstrStage<1, [XU]>], - [1, 1, 1]>, - InstrItinData<LdStDCBI , [InstrStage<1, [XU]>], - [1, 1, 1]>, - InstrItinData<LdStLoad , [InstrStage<1, [XU]>], - [6, 1, 1]>, - InstrItinData<LdStLoadUpd , [InstrStage<1, [XU]>], - [6, 8, 1, 1]>, - InstrItinData<LdStLDU , [InstrStage<1, [XU]>], - [6, 1, 1]>, - InstrItinData<LdStStore , [InstrStage<1, [XU]>], - [1, 1, 1]>, - InstrItinData<LdStStoreUpd, [InstrStage<1, [XU]>], - [2, 1, 1, 1]>, - InstrItinData<LdStICBI, [InstrStage<1, [XU]>], - [16, 1, 1]>, - InstrItinData<LdStSTFD , [InstrStage<1, [XU]>], - [1, 1, 1]>, - InstrItinData<LdStSTFDU , [InstrStage<1, [XU]>], - [2, 1, 1, 1]>, - InstrItinData<LdStLFD , [InstrStage<1, [XU]>], - [7, 1, 1]>, - InstrItinData<LdStLFDU , [InstrStage<1, [XU]>], - [7, 9, 1, 1]>, - InstrItinData<LdStLHA , [InstrStage<1, [XU]>], - [6, 1, 1]>, - InstrItinData<LdStLHAU , [InstrStage<1, [XU]>], - [6, 8, 1, 1]>, - InstrItinData<LdStLWARX , [InstrStage<1, [XU]>], - [82, 1, 1]>, // L2 latency - InstrItinData<LdStSTD , [InstrStage<1, [XU]>], - [1, 1, 1]>, - InstrItinData<LdStSTDU , [InstrStage<1, [XU]>], - [2, 1, 1, 1]>, - InstrItinData<LdStSTDCX , [InstrStage<1, [XU]>], - [82, 1, 1]>, // L2 latency - InstrItinData<LdStSTWCX , [InstrStage<1, [XU]>], - [82, 1, 1]>, // L2 latency - InstrItinData<LdStSync , [InstrStage<1, [XU]>], - [6]>, - InstrItinData<SprISYNC , [InstrStage<1, [XU]>], - [16]>, - InstrItinData<SprMTMSR , [InstrStage<1, [XU]>], - [16, 1]>, - InstrItinData<SprMFCR , [InstrStage<1, [XU]>], - [6, 1]>, - InstrItinData<SprMFMSR , [InstrStage<1, [XU]>], - [4, 1]>, - InstrItinData<SprMFSPR , [InstrStage<1, [XU]>], - [6, 1]>, - InstrItinData<SprMFTB , [InstrStage<1, [XU]>], - [4, 1]>, - InstrItinData<SprMTSPR , [InstrStage<1, [XU]>], - [6, 1]>, - InstrItinData<SprRFI , [InstrStage<1, [XU]>], - [16]>, - InstrItinData<SprSC , [InstrStage<1, [XU]>], - [16]>, - InstrItinData<FPGeneral , [InstrStage<1, [FU]>], - [6, 1, 1]>, - InstrItinData<FPAddSub , [InstrStage<1, [FU]>], - [6, 1, 1]>, - InstrItinData<FPCompare , [InstrStage<1, [FU]>], - [5, 1, 1]>, - InstrItinData<FPDivD , [InstrStage<1, [FU]>], - [72, 1, 1]>, - InstrItinData<FPDivS , [InstrStage<1, [FU]>], - [59, 1, 1]>, - InstrItinData<FPSqrt , [InstrStage<1, [FU]>], - [69, 1, 1]>, - InstrItinData<FPFused , [InstrStage<1, [FU]>], - [6, 1, 1, 1]>, - InstrItinData<FPRes , [InstrStage<1, [FU]>], - [6, 1]> + [A2_XU, A2_FU], [], [ + InstrItinData<IIC_IntSimple, [InstrStage<1, [A2_XU]>], + [1, 0, 0]>, + InstrItinData<IIC_IntGeneral, [InstrStage<1, [A2_XU]>], + [2, 0, 0]>, + InstrItinData<IIC_IntCompare, [InstrStage<1, [A2_XU]>], + [2, 0, 0]>, + InstrItinData<IIC_IntDivW, [InstrStage<1, [A2_XU]>], + [39, 0, 0]>, + InstrItinData<IIC_IntDivD, [InstrStage<1, [A2_XU]>], + [71, 0, 0]>, + InstrItinData<IIC_IntMulHW, [InstrStage<1, [A2_XU]>], + [5, 0, 0]>, + InstrItinData<IIC_IntMulHWU, [InstrStage<1, [A2_XU]>], + [5, 0, 0]>, + InstrItinData<IIC_IntMulLI, [InstrStage<1, [A2_XU]>], + [6, 0, 0]>, + InstrItinData<IIC_IntRotate, [InstrStage<1, [A2_XU]>], + [2, 0, 0]>, + InstrItinData<IIC_IntRotateD, [InstrStage<1, [A2_XU]>], + [2, 0, 0]>, + InstrItinData<IIC_IntRotateDI, [InstrStage<1, [A2_XU]>], + [2, 0, 0]>, + InstrItinData<IIC_IntShift, [InstrStage<1, [A2_XU]>], + [2, 0, 0]>, + InstrItinData<IIC_IntTrapW, [InstrStage<1, [A2_XU]>], + [2, 0]>, + InstrItinData<IIC_IntTrapD, [InstrStage<1, [A2_XU]>], + [2, 0]>, + InstrItinData<IIC_BrB, [InstrStage<1, [A2_XU]>], + [6, 0, 0]>, + InstrItinData<IIC_BrCR, [InstrStage<1, [A2_XU]>], + [1, 0, 0]>, + InstrItinData<IIC_BrMCR, [InstrStage<1, [A2_XU]>], + [5, 0, 0]>, + InstrItinData<IIC_BrMCRX, [InstrStage<1, [A2_XU]>], + [1, 0, 0]>, + InstrItinData<IIC_LdStDCBA, [InstrStage<1, [A2_XU]>], + [1, 0, 0]>, + InstrItinData<IIC_LdStDCBF, [InstrStage<1, [A2_XU]>], + [1, 0, 0]>, + InstrItinData<IIC_LdStDCBI, [InstrStage<1, [A2_XU]>], + [1, 0, 0]>, + InstrItinData<IIC_LdStLoad, [InstrStage<1, [A2_XU]>], + [6, 0, 0]>, + InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [A2_XU]>], + [6, 8, 0, 0]>, + InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [A2_XU]>], + [6, 8, 0, 0]>, + InstrItinData<IIC_LdStLDU, [InstrStage<1, [A2_XU]>], + [6, 0, 0]>, + InstrItinData<IIC_LdStLDUX, [InstrStage<1, [A2_XU]>], + [6, 0, 0]>, + InstrItinData<IIC_LdStStore, [InstrStage<1, [A2_XU]>], + [0, 0, 0]>, + InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [A2_XU]>], + [2, 0, 0, 0]>, + InstrItinData<IIC_LdStICBI, [InstrStage<1, [A2_XU]>], + [16, 0, 0]>, + InstrItinData<IIC_LdStSTFD, [InstrStage<1, [A2_XU]>], + [0, 0, 0]>, + InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [A2_XU]>], + [2, 0, 0, 0]>, + InstrItinData<IIC_LdStLFD, [InstrStage<1, [A2_XU]>], + [7, 0, 0]>, + InstrItinData<IIC_LdStLFDU, [InstrStage<1, [A2_XU]>], + [7, 9, 0, 0]>, + InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [A2_XU]>], + [7, 9, 0, 0]>, + InstrItinData<IIC_LdStLHA, [InstrStage<1, [A2_XU]>], + [6, 0, 0]>, + InstrItinData<IIC_LdStLHAU, [InstrStage<1, [A2_XU]>], + [6, 8, 0, 0]>, + InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [A2_XU]>], + [6, 8, 0, 0]>, + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [A2_XU]>], + [82, 0, 0]>, // L2 latency + InstrItinData<IIC_LdStSTD, [InstrStage<1, [A2_XU]>], + [0, 0, 0]>, + InstrItinData<IIC_LdStSTDU, [InstrStage<1, [A2_XU]>], + [2, 0, 0, 0]>, + InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [A2_XU]>], + [2, 0, 0, 0]>, + InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [A2_XU]>], + [82, 0, 0]>, // L2 latency + InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [A2_XU]>], + [82, 0, 0]>, // L2 latency + InstrItinData<IIC_LdStSync, [InstrStage<1, [A2_XU]>], + [6]>, + InstrItinData<IIC_SprISYNC, [InstrStage<1, [A2_XU]>], + [16]>, + InstrItinData<IIC_SprMTMSR, [InstrStage<1, [A2_XU]>], + [16, 0]>, + InstrItinData<IIC_SprMFCR, [InstrStage<1, [A2_XU]>], + [6, 0]>, + InstrItinData<IIC_SprMFCRF, [InstrStage<1, [A2_XU]>], + [1, 0]>, + InstrItinData<IIC_SprMFMSR, [InstrStage<1, [A2_XU]>], + [4, 0]>, + InstrItinData<IIC_SprMFSPR, [InstrStage<1, [A2_XU]>], + [6, 0]>, + InstrItinData<IIC_SprMFTB, [InstrStage<1, [A2_XU]>], + [4, 0]>, + InstrItinData<IIC_SprMTSPR, [InstrStage<1, [A2_XU]>], + [6, 0]>, + InstrItinData<IIC_SprRFI, [InstrStage<1, [A2_XU]>], + [16]>, + InstrItinData<IIC_SprSC, [InstrStage<1, [A2_XU]>], + [16]>, + InstrItinData<IIC_FPGeneral, [InstrStage<1, [A2_FU]>], + [6, 0, 0]>, + InstrItinData<IIC_FPAddSub, [InstrStage<1, [A2_FU]>], + [6, 0, 0]>, + InstrItinData<IIC_FPCompare, [InstrStage<1, [A2_FU]>], + [5, 0, 0]>, + InstrItinData<IIC_FPDivD, [InstrStage<1, [A2_FU]>], + [72, 0, 0]>, + InstrItinData<IIC_FPDivS, [InstrStage<1, [A2_FU]>], + [59, 0, 0]>, + InstrItinData<IIC_FPSqrtD, [InstrStage<1, [A2_FU]>], + [69, 0, 0]>, + InstrItinData<IIC_FPSqrtS, [InstrStage<1, [A2_FU]>], + [65, 0, 0]>, + InstrItinData<IIC_FPFused, [InstrStage<1, [A2_FU]>], + [6, 0, 0, 0]>, + InstrItinData<IIC_FPRes, [InstrStage<1, [A2_FU]>], + [6, 0]> ]>; // ===---------------------------------------------------------------------===// // A2 machine model for scheduling and other instruction cost heuristics. def PPCA2Model : SchedMachineModel { - let IssueWidth = 1; // 2 micro-ops are dispatched per cycle. + let IssueWidth = 1; // 1 instruction is dispatched per cycle. let MinLatency = -1; // OperandCycles are interpreted as MinLatency. let LoadLatency = 6; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td index c189b9ed9a6c..dab89e3db353 100644 --- a/lib/Target/PowerPC/PPCScheduleE500mc.td +++ b/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -19,238 +19,285 @@ // * Decode & Dispatch // Can dispatch up to 2 instructions per clock cycle to either the GPR Issue // queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). -def DIS0 : FuncUnit; // Dispatch stage - insn 1 -def DIS1 : FuncUnit; // Dispatch stage - insn 2 +def E500_DIS0 : FuncUnit; // Dispatch stage - insn 1 +def E500_DIS1 : FuncUnit; // Dispatch stage - insn 2 // * Execute // 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. // Some instructions can only execute in SFX0 but not SFX1. // The CFX has a bypass path, allowing non-divide instructions to execute // while a divide instruction is executed. -def SFX0 : FuncUnit; // Simple unit 0 -def SFX1 : FuncUnit; // Simple unit 1 -def BU : FuncUnit; // Branch unit -def CFX_DivBypass - : FuncUnit; // CFX divide bypass path -def CFX_0 : FuncUnit; // CFX pipeline -def LSU_0 : FuncUnit; // LSU pipeline -def FPU_0 : FuncUnit; // FPU pipeline +def E500_SFX0 : FuncUnit; // Simple unit 0 +def E500_SFX1 : FuncUnit; // Simple unit 1 +def E500_BU : FuncUnit; // Branch unit +def E500_CFX_DivBypass + : FuncUnit; // CFX divide bypass path +def E500_CFX_0 : FuncUnit; // CFX pipeline +def E500_LSU_0 : FuncUnit; // LSU pipeline +def E500_FPU_0 : FuncUnit; // FPU pipeline -def CR_Bypass : Bypass; +def E500_GPR_Bypass : Bypass; +def E500_FPR_Bypass : Bypass; +def E500_CR_Bypass : Bypass; def PPCE500mcItineraries : ProcessorItineraries< - [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0], - [CR_Bypass, GPR_Bypass, FPR_Bypass], [ - InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1]>], - [5, 1, 1], // Latency = 1 or 2 - [CR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [CFX_0], 0>, - InstrStage<14, [CFX_DivBypass]>], - [17, 1, 1], // Latency=4..35, Repeat= 4..35 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<8, [FPU_0]>], - [11], // Latency = 8 - [FPR_Bypass]>, - InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<8, [FPU_0]>], - [11, 1, 1], // Latency = 8 - [NoBypass, NoBypass, NoBypass]>, - InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [CFX_0]>], - [7, 1, 1], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [CFX_0]>], - [7, 1, 1], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [CFX_0]>], - [7, 1, 1], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<2, [SFX0]>], - [5, 1], // Latency = 2, Repeat rate = 2 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [BU]>], - [4, 1], // Latency = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [BU]>], - [4, 1, 1], // Latency = 1 - [CR_Bypass, CR_Bypass, CR_Bypass]>, - InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [BU]>], - [4, 1], // Latency = 1 - [CR_Bypass, CR_Bypass]>, - InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [CR_Bypass, GPR_Bypass]>, - InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [NoBypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [7, 1, 1], // Latency = 4 - [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 1, 1], // Latency = 4 - [FPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [7, 1], // Latency = r+3 - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<3, [LSU_0]>], - [6, 1, 1], // Latency = 3, Repeat rate = 3 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>]>, - InstrItinData<SprMFSR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<4, [SFX0]>], - [7, 1], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<2, [SFX0, SFX1]>], - [5, 1], // Latency = 2, Repeat rate = 4 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<SprMTSR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0]>], - [5, 1], - [NoBypass, GPR_Bypass]>, - InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0], 0>]>, - InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<5, [SFX0]>], - [8, 1], - [GPR_Bypass, CR_Bypass]>, - InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<4, [SFX0]>], - [7, 1], // Latency = 4, Repeat rate = 4 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1]>], - [4, 1], // Latency = 1, Repeat rate = 1 - [GPR_Bypass, CR_Bypass]>, - InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<4, [SFX0]>], - [7, 1], // Latency = 4, Repeat rate = 4 - [NoBypass, GPR_Bypass]>, - InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1]>], - [4, 1], // Latency = 1, Repeat rate = 1 - [CR_Bypass, GPR_Bypass]>, - InstrItinData<SprMTSRIN , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0]>], - [4, 1], - [NoBypass, GPR_Bypass]>, - InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<2, [FPU_0]>], - [11, 1, 1], // Latency = 8, Repeat rate = 2 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<4, [FPU_0]>], - [13, 1, 1], // Latency = 10, Repeat rate = 4 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<2, [FPU_0]>], - [11, 1, 1], // Latency = 8, Repeat rate = 2 - [CR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<68, [FPU_0]>], - [71, 1, 1], // Latency = 68, Repeat rate = 68 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<38, [FPU_0]>], - [41, 1, 1], // Latency = 38, Repeat rate = 38 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<4, [FPU_0]>], - [13, 1, 1, 1], // Latency = 10, Repeat rate = 4 - [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<38, [FPU_0]>], - [41, 1], // Latency = 38, Repeat rate = 38 - [FPR_Bypass, FPR_Bypass]> + [E500_DIS0, E500_DIS1, E500_SFX0, E500_SFX1, E500_BU, E500_CFX_DivBypass, + E500_CFX_0, E500_LSU_0, E500_FPU_0], + [E500_CR_Bypass, E500_GPR_Bypass, E500_FPR_Bypass], [ + InstrItinData<IIC_IntSimple, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntCompare, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [5, 1, 1], // Latency = 1 or 2 + [E500_CR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntDivW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_CFX_0], 0>, + InstrStage<14, [E500_CFX_DivBypass]>], + [17, 1, 1], // Latency=4..35, Repeat= 4..35 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntMFFS, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<8, [E500_FPU_0]>], + [11], // Latency = 8 + [E500_FPR_Bypass]>, + InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<8, [E500_FPU_0]>], + [11, 1, 1], // Latency = 8 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData<IIC_IntMulHW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntMulHWU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntMulLI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntRotate, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntShift, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntTrapW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<2, [E500_SFX0]>], + [5, 1], // Latency = 2, Repeat rate = 2 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_BrB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_BU]>], + [4, 1], // Latency = 1 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_BrCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_BU]>], + [4, 1, 1], // Latency = 1 + [E500_CR_Bypass, + E500_CR_Bypass, E500_CR_Bypass]>, + InstrItinData<IIC_BrMCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_BU]>], + [4, 1], // Latency = 1 + [E500_CR_Bypass, E500_CR_Bypass]>, + InstrItinData<IIC_BrMCRX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_CR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBA, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3, Repeat rate = 1 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBF, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStLoad, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStStore, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStICBI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStSTFD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1, 1], // Latency = 3 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1, 1], // Latency = 3 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLFD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [7, 1, 1], // Latency = 4 + [E500_FPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStLFDU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [7, 1, 1], // Latency = 4 + [E500_FPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [7, 1, 1], // Latency = 4 + [E500_FPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLHA, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStLHAU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStLMW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [7, 1], // Latency = r+3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<3, [E500_LSU_0]>], + [6, 1, 1], // Latency = 3, Repeat rate = 3 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStSync, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>]>, + InstrItinData<IIC_SprMFSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_SFX0]>], + [7, 1], + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_SprMTMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<2, [E500_SFX0, E500_SFX1]>], + [5, 1], // Latency = 2, Repeat rate = 4 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_SprMTSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0]>], + [5, 1], + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0], 0>]>, + InstrItinData<IIC_SprMFCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<5, [E500_SFX0]>], + [8, 1], + [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData<IIC_SprMFCRF, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<5, [E500_SFX0]>], + [8, 1], + [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData<IIC_SprMFTB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [E500_CR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_SprMTSRIN, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0]>], + [4, 1], + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_FPGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<2, [E500_FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData<IIC_FPAddSub, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_FPU_0]>], + [13, 1, 1], // Latency = 10, Repeat rate = 4 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData<IIC_FPCompare, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<2, [E500_FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [E500_CR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData<IIC_FPDivD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<68, [E500_FPU_0]>], + [71, 1, 1], // Latency = 68, Repeat rate = 68 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData<IIC_FPDivS, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<38, [E500_FPU_0]>], + [41, 1, 1], // Latency = 38, Repeat rate = 38 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData<IIC_FPFused, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_FPU_0]>], + [13, 1, 1, 1], // Latency = 10, Repeat rate = 4 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass, + E500_FPR_Bypass]>, + InstrItinData<IIC_FPRes, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<38, [E500_FPU_0]>], + [41, 1], // Latency = 38, Repeat rate = 38 + [E500_FPR_Bypass, E500_FPR_Bypass]> ]>; // ===---------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td index 7a24d20323da..de097d9d8cf5 100644 --- a/lib/Target/PowerPC/PPCScheduleE5500.td +++ b/lib/Target/PowerPC/PPCScheduleE5500.td @@ -20,280 +20,344 @@ // * Decode & Dispatch // Can dispatch up to 2 instructions per clock cycle to either the GPR Issue // queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). -// def DIS0 : FuncUnit; -// def DIS1 : FuncUnit; +def E5500_DIS0 : FuncUnit; +def E5500_DIS1 : FuncUnit; // * Execute // 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. // The CFX has a bypass path, allowing non-divide instructions to execute // while a divide instruction is being executed. -// def SFX0 : FuncUnit; // Simple unit 0 -// def SFX1 : FuncUnit; // Simple unit 1 -// def BU : FuncUnit; // Branch unit -// def CFX_DivBypass -// : FuncUnit; // CFX divide bypass path -// def CFX_0 : FuncUnit; // CFX pipeline stage 0 +def E5500_SFX0 : FuncUnit; // Simple unit 0 +def E5500_SFX1 : FuncUnit; // Simple unit 1 +def E5500_BU : FuncUnit; // Branch unit +def E5500_CFX_DivBypass + : FuncUnit; // CFX divide bypass path +def E5500_CFX_0 : FuncUnit; // CFX pipeline stage 0 -def CFX_1 : FuncUnit; // CFX pipeline stage 1 +def E5500_CFX_1 : FuncUnit; // CFX pipeline stage 1 -// def LSU_0 : FuncUnit; // LSU pipeline -// def FPU_0 : FuncUnit; // FPU pipeline +def E5500_LSU_0 : FuncUnit; // LSU pipeline +def E5500_FPU_0 : FuncUnit; // FPU pipeline -// def CR_Bypass : Bypass; +def E5500_GPR_Bypass : Bypass; +def E5500_FPR_Bypass : Bypass; +def E5500_CR_Bypass : Bypass; def PPCE5500Itineraries : ProcessorItineraries< - [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1, - LSU_0, FPU_0], - [CR_Bypass, GPR_Bypass, FPR_Bypass], [ - InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1]>], - [5, 2, 2], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1]>], - [5, 2, 2], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1]>], - [6, 2, 2], // Latency = 1 or 2 - [CR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntDivD , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [CFX_0], 0>, - InstrStage<26, [CFX_DivBypass]>], - [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [CFX_0], 0>, - InstrStage<16, [CFX_DivBypass]>], - [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [FPU_0]>], - [11], // Latency = 7, Repeat rate = 1 - [FPR_Bypass]>, - InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<7, [FPU_0]>], - [11, 2, 2], // Latency = 7, Repeat rate = 7 - [NoBypass, NoBypass, NoBypass]>, - InstrItinData<IntMulHD , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [CFX_0], 0>, - InstrStage<2, [CFX_1]>], - [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [CFX_0], 0>, - InstrStage<1, [CFX_1]>], - [8, 2, 2], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [CFX_0], 0>, - InstrStage<1, [CFX_1]>], - [8, 2, 2], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [CFX_0], 0>, - InstrStage<2, [CFX_1]>], - [8, 2, 2], // Latency = 4 or 5, Repeat = 2 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1]>], - [5, 2, 2], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntRotateD , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<2, [SFX0, SFX1]>], - [6, 2, 2], // Latency = 2, Repeat rate = 2 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntRotateDI , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1]>], - [5, 2, 2], // Latency = 1, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<2, [SFX0, SFX1]>], - [6, 2, 2], // Latency = 2, Repeat rate = 2 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<2, [SFX0]>], - [6, 2], // Latency = 2, Repeat rate = 2 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [BU]>], - [5, 2], // Latency = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [BU]>], - [5, 2, 2], // Latency = 1 - [CR_Bypass, CR_Bypass, CR_Bypass]>, - InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [BU]>], - [5, 2], // Latency = 1 - [CR_Bypass, CR_Bypass]>, - InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [CFX_0]>], - [5, 2, 2], // Latency = 1 - [CR_Bypass, GPR_Bypass]>, - InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData<LdStLD , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLDARX , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<3, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLDU , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [8, 2, 2], // Latency = 4, Repeat rate = 1 - [FPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [8, 2, 2], // Latency = 4, Repeat rate = 1 - [FPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<4, [LSU_0]>], - [8, 2], // Latency = r+3, Repeat rate = r+3 - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<3, [LSU_0]>], - [7, 2, 2], // Latency = 3, Repeat rate = 3 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStSTD , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSTDCX , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSTDU , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0]>]>, - InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<2, [CFX_0]>], - [6, 2], // Latency = 2, Repeat rate = 4 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [LSU_0], 0>]>, - InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<5, [CFX_0]>], - [9, 2], // Latency = 5, Repeat rate = 5 - [GPR_Bypass, CR_Bypass]>, - InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<4, [SFX0]>], - [8, 2], // Latency = 4, Repeat rate = 4 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [CFX_0]>], - [5], // Latency = 1, Repeat rate = 1 - [GPR_Bypass]>, - InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<4, [CFX_0]>], - [8, 2], // Latency = 4, Repeat rate = 4 - [NoBypass, GPR_Bypass]>, - InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [SFX0, SFX1]>], - [5], // Latency = 1, Repeat rate = 1 - [GPR_Bypass]>, - InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [FPU_0]>], - [11, 2, 2], // Latency = 7, Repeat rate = 1 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [FPU_0]>], - [11, 2, 2], // Latency = 7, Repeat rate = 1 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [FPU_0]>], - [11, 2, 2], // Latency = 7, Repeat rate = 1 - [CR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<31, [FPU_0]>], - [39, 2, 2], // Latency = 35, Repeat rate = 31 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<16, [FPU_0]>], - [24, 2, 2], // Latency = 20, Repeat rate = 16 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<1, [FPU_0]>], - [11, 2, 2, 2], // Latency = 7, Repeat rate = 1 - [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>, - InstrStage<2, [FPU_0]>], - [12, 2], // Latency = 8, Repeat rate = 2 - [FPR_Bypass, FPR_Bypass]> + [E5500_DIS0, E5500_DIS1, E5500_SFX0, E5500_SFX1, E5500_BU, + E5500_CFX_DivBypass, E5500_CFX_0, E5500_CFX_1, + E5500_LSU_0, E5500_FPU_0], + [E5500_CR_Bypass, E5500_GPR_Bypass, E5500_FPR_Bypass], [ + InstrItinData<IIC_IntSimple, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntCompare, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [6, 2, 2], // Latency = 1 or 2 + [E5500_CR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntDivD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<26, [E5500_CFX_DivBypass]>], + [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntDivW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<16, [E5500_CFX_DivBypass]>], + [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntMFFS, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_FPU_0]>], + [11], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass]>, + InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<7, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 7 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData<IIC_IntMulHD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<2, [E5500_CFX_1]>], + [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntMulHW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<1, [E5500_CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntMulHWU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<1, [E5500_CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntMulLI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<2, [E5500_CFX_1]>], + [8, 2, 2], // Latency = 4 or 5, Repeat = 2 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntRotate, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntRotateD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<2, [E5500_SFX0, E5500_SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntRotateDI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntShift, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<2, [E5500_SFX0, E5500_SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntTrapW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<2, [E5500_SFX0]>], + [6, 2], // Latency = 2, Repeat rate = 2 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_BrB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_BU]>], + [5, 2], // Latency = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_BrCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_BU]>], + [5, 2, 2], // Latency = 1 + [E5500_CR_Bypass, + E5500_CR_Bypass, E5500_CR_Bypass]>, + InstrItinData<IIC_BrMCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_BU]>], + [5, 2], // Latency = 1 + [E5500_CR_Bypass, E5500_CR_Bypass]>, + InstrItinData<IIC_BrMCRX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0]>], + [5, 2, 2], // Latency = 1 + [E5500_CR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBA, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBF, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStLoad, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStLDARX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<3, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 3 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStLDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStStore, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStICBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStSTFD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLFD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLFDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLHA, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStLHAU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLMW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<4, [E5500_LSU_0]>], + [8, 2], // Latency = r+3, Repeat rate = r+3 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<3, [E5500_LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 3 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStSTD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStSTDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStSync, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>]>, + InstrItinData<IIC_SprMTMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<2, [E5500_CFX_0]>], + [6, 2], // Latency = 2, Repeat rate = 4 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0], 0>]>, + InstrItinData<IIC_SprMFCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<5, [E5500_CFX_0]>], + [9, 2], // Latency = 5, Repeat rate = 5 + [E5500_GPR_Bypass, E5500_CR_Bypass]>, + InstrItinData<IIC_SprMFCRF, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<5, [E5500_CFX_0]>], + [9, 2], // Latency = 5, Repeat rate = 5 + [E5500_GPR_Bypass, E5500_CR_Bypass]>, + InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<4, [E5500_SFX0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0]>], + [5], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass]>, + InstrItinData<IIC_SprMFTB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<4, [E5500_CFX_0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass]>, + InstrItinData<IIC_FPGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData<IIC_FPAddSub, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData<IIC_FPCompare, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_CR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData<IIC_FPDivD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<31, [E5500_FPU_0]>], + [39, 2, 2], // Latency = 35, Repeat rate = 31 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData<IIC_FPDivS, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<16, [E5500_FPU_0]>], + [24, 2, 2], // Latency = 20, Repeat rate = 16 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData<IIC_FPFused, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass, + E5500_FPR_Bypass]>, + InstrItinData<IIC_FPRes, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<2, [E5500_FPU_0]>], + [12, 2], // Latency = 8, Repeat rate = 2 + [E5500_FPR_Bypass, E5500_FPR_Bypass]> ]>; // ===---------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index 72a0a392631a..21efd8f8f6c9 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -11,61 +11,70 @@ // //===----------------------------------------------------------------------===// +def G3_BPU : FuncUnit; // Branch unit +def G3_SLU : FuncUnit; // Store/load unit +def G3_SRU : FuncUnit; // special register unit +def G3_IU1 : FuncUnit; // integer unit 1 (simple) +def G3_IU2 : FuncUnit; // integer unit 2 (complex) +def G3_FPU1 : FuncUnit; // floating point unit 1 def G3Itineraries : ProcessorItineraries< - [IU1, IU2, FPU1, BPU, SRU, SLU], [], [ - InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2]>]>, - InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>, - InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>, - InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>, - InstrItinData<IntMFFS , [InstrStage<1, [FPU1]>]>, - InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>, - InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>, - InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>, - InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>, - InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>, - InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>, - InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>, - InstrItinData<BrB , [InstrStage<1, [BPU]>]>, - InstrItinData<BrCR , [InstrStage<1, [SRU]>]>, - InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>, - InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>, - InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>, - InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>, - InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>, - InstrItinData<LdStSync , [InstrStage<3, [SLU]>]>, - InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>, - InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>, - InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>, - InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>, - InstrItinData<SprTLBSYNC , [InstrStage<3, [SRU]>]>, - InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>, - InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>, - InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>, - InstrItinData<SprMFTB , [InstrStage<3, [SRU]>]>, - InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>, - InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>, - InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>, - InstrItinData<SprSC , [InstrStage<2, [SRU]>]>, - InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>, - InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>, - InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>, - InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>, - InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>, - InstrItinData<FPFused , [InstrStage<2, [FPU1]>]>, - InstrItinData<FPRes , [InstrStage<10, [FPU1]>]> + [G3_IU1, G3_IU2, G3_FPU1, G3_BPU, G3_SRU, G3_SLU], [], [ + InstrItinData<IIC_IntSimple , [InstrStage<1, [G3_IU1, G3_IU2]>]>, + InstrItinData<IIC_IntGeneral , [InstrStage<1, [G3_IU1, G3_IU2]>]>, + InstrItinData<IIC_IntCompare , [InstrStage<1, [G3_IU1, G3_IU2]>]>, + InstrItinData<IIC_IntDivW , [InstrStage<19, [G3_IU1]>]>, + InstrItinData<IIC_IntMFFS , [InstrStage<1, [G3_FPU1]>]>, + InstrItinData<IIC_IntMTFSB0 , [InstrStage<3, [G3_FPU1]>]>, + InstrItinData<IIC_IntMulHW , [InstrStage<5, [G3_IU1]>]>, + InstrItinData<IIC_IntMulHWU , [InstrStage<6, [G3_IU1]>]>, + InstrItinData<IIC_IntMulLI , [InstrStage<3, [G3_IU1]>]>, + InstrItinData<IIC_IntRotate , [InstrStage<1, [G3_IU1, G3_IU2]>]>, + InstrItinData<IIC_IntShift , [InstrStage<1, [G3_IU1, G3_IU2]>]>, + InstrItinData<IIC_IntTrapW , [InstrStage<2, [G3_IU1, G3_IU2]>]>, + InstrItinData<IIC_BrB , [InstrStage<1, [G3_BPU]>]>, + InstrItinData<IIC_BrCR , [InstrStage<1, [G3_SRU]>]>, + InstrItinData<IIC_BrMCR , [InstrStage<1, [G3_SRU]>]>, + InstrItinData<IIC_BrMCRX , [InstrStage<1, [G3_SRU]>]>, + InstrItinData<IIC_LdStDCBA , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G3_SLU]>]>, + InstrItinData<IIC_LdStDCBI , [InstrStage<3, [G3_SLU]>]>, + InstrItinData<IIC_LdStLoad , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStStore , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStICBI , [InstrStage<3, [G3_SLU]>]>, + InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLFD , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLFDU , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLFDUX , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLHA , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLHAU , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLHAUX , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLMW , [InstrStage<34, [G3_SLU]>]>, + InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G3_SLU]>]>, + InstrItinData<IIC_LdStSTWCX , [InstrStage<8, [G3_SLU]>]>, + InstrItinData<IIC_LdStSync , [InstrStage<3, [G3_SLU]>]>, + InstrItinData<IIC_SprISYNC , [InstrStage<2, [G3_SRU]>]>, + InstrItinData<IIC_SprMFSR , [InstrStage<3, [G3_SRU]>]>, + InstrItinData<IIC_SprMTMSR , [InstrStage<1, [G3_SRU]>]>, + InstrItinData<IIC_SprMTSR , [InstrStage<2, [G3_SRU]>]>, + InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G3_SRU]>]>, + InstrItinData<IIC_SprMFCR , [InstrStage<1, [G3_SRU]>]>, + InstrItinData<IIC_SprMFMSR , [InstrStage<1, [G3_SRU]>]>, + InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G3_SRU]>]>, + InstrItinData<IIC_SprMFTB , [InstrStage<3, [G3_SRU]>]>, + InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G3_SRU]>]>, + InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G3_SRU]>]>, + InstrItinData<IIC_SprRFI , [InstrStage<2, [G3_SRU]>]>, + InstrItinData<IIC_SprSC , [InstrStage<2, [G3_SRU]>]>, + InstrItinData<IIC_FPGeneral , [InstrStage<1, [G3_FPU1]>]>, + InstrItinData<IIC_FPAddSub , [InstrStage<1, [G3_FPU1]>]>, + InstrItinData<IIC_FPCompare , [InstrStage<1, [G3_FPU1]>]>, + InstrItinData<IIC_FPDivD , [InstrStage<31, [G3_FPU1]>]>, + InstrItinData<IIC_FPDivS , [InstrStage<17, [G3_FPU1]>]>, + InstrItinData<IIC_FPFused , [InstrStage<2, [G3_FPU1]>]>, + InstrItinData<IIC_FPRes , [InstrStage<10, [G3_FPU1]>]> ]>; diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index fc9120dfa290..340773ef7876 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -11,71 +11,86 @@ // //===----------------------------------------------------------------------===// +def G4_BPU : FuncUnit; // Branch unit +def G4_SLU : FuncUnit; // Store/load unit +def G4_SRU : FuncUnit; // special register unit +def G4_IU1 : FuncUnit; // integer unit 1 (simple) +def G4_IU2 : FuncUnit; // integer unit 2 (complex) +def G4_FPU1 : FuncUnit; // floating point unit 1 +def G4_VPU : FuncUnit; // vector permutation unit +def G4_VIU1 : FuncUnit; // vector integer unit 1 (simple) +def G4_VIU2 : FuncUnit; // vector integer unit 2 (complex) +def G4_VFPU : FuncUnit; // vector floating point unit + def G4Itineraries : ProcessorItineraries< - [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [ - InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2]>]>, - InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>, - InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>, - InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>, - InstrItinData<IntMFFS , [InstrStage<3, [FPU1]>]>, - InstrItinData<IntMFVSCR , [InstrStage<1, [VIU1]>]>, - InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>, - InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>, - InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>, - InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>, - InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>, - InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>, - InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>, - InstrItinData<BrB , [InstrStage<1, [BPU]>]>, - InstrItinData<BrCR , [InstrStage<1, [SRU]>]>, - InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>, - InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>, - InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>, - InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>, - InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStSTVEBX , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStSTWCX , [InstrStage<5, [SLU]>]>, - InstrItinData<LdStSync , [InstrStage<8, [SLU]>]>, - InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>, - InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>, - InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>, - InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>, - InstrItinData<SprTLBSYNC , [InstrStage<8, [SRU]>]>, - InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>, - InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>, - InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>, - InstrItinData<SprMFTB , [InstrStage<1, [SRU]>]>, - InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>, - InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>, - InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>, - InstrItinData<SprSC , [InstrStage<2, [SRU]>]>, - InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>, - InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>, - InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>, - InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>, - InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>, - InstrItinData<FPFused , [InstrStage<1, [FPU1]>]>, - InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>, - InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>, - InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>, - InstrItinData<VecFPCompare, [InstrStage<1, [VIU1]>]>, - InstrItinData<VecComplex , [InstrStage<3, [VIU2]>]>, - InstrItinData<VecPerm , [InstrStage<1, [VPU]>]>, - InstrItinData<VecFPRound , [InstrStage<4, [VFPU]>]>, - InstrItinData<VecVSL , [InstrStage<1, [VIU1]>]>, - InstrItinData<VecVSR , [InstrStage<1, [VIU1]>]> + [G4_IU1, G4_IU2, G4_SLU, G4_SRU, G4_BPU, G4_FPU1, + G4_VIU1, G4_VIU2, G4_VPU, G4_VFPU], [], [ + InstrItinData<IIC_IntSimple , [InstrStage<1, [G4_IU1, G4_IU2]>]>, + InstrItinData<IIC_IntGeneral , [InstrStage<1, [G4_IU1, G4_IU2]>]>, + InstrItinData<IIC_IntCompare , [InstrStage<1, [G4_IU1, G4_IU2]>]>, + InstrItinData<IIC_IntDivW , [InstrStage<19, [G4_IU1]>]>, + InstrItinData<IIC_IntMFFS , [InstrStage<3, [G4_FPU1]>]>, + InstrItinData<IIC_IntMFVSCR , [InstrStage<1, [G4_VIU1]>]>, + InstrItinData<IIC_IntMTFSB0 , [InstrStage<3, [G4_FPU1]>]>, + InstrItinData<IIC_IntMulHW , [InstrStage<5, [G4_IU1]>]>, + InstrItinData<IIC_IntMulHWU , [InstrStage<6, [G4_IU1]>]>, + InstrItinData<IIC_IntMulLI , [InstrStage<3, [G4_IU1]>]>, + InstrItinData<IIC_IntRotate , [InstrStage<1, [G4_IU1, G4_IU2]>]>, + InstrItinData<IIC_IntShift , [InstrStage<1, [G4_IU1, G4_IU2]>]>, + InstrItinData<IIC_IntTrapW , [InstrStage<2, [G4_IU1, G4_IU2]>]>, + InstrItinData<IIC_BrB , [InstrStage<1, [G4_BPU]>]>, + InstrItinData<IIC_BrCR , [InstrStage<1, [G4_SRU]>]>, + InstrItinData<IIC_BrMCR , [InstrStage<1, [G4_SRU]>]>, + InstrItinData<IIC_BrMCRX , [InstrStage<1, [G4_SRU]>]>, + InstrItinData<IIC_LdStDCBF , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStDCBI , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLoad , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStStore , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStDSS , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStICBI , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLFD , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLFDU , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLFDUX , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLHA , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLHAU , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLHAUX , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLMW , [InstrStage<34, [G4_SLU]>]>, + InstrItinData<IIC_LdStLVecX , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4_SLU]>]>, + InstrItinData<IIC_LdStSTVEBX , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStSTWCX , [InstrStage<5, [G4_SLU]>]>, + InstrItinData<IIC_LdStSync , [InstrStage<8, [G4_SLU]>]>, + InstrItinData<IIC_SprISYNC , [InstrStage<2, [G4_SRU]>]>, + InstrItinData<IIC_SprMFSR , [InstrStage<3, [G4_SRU]>]>, + InstrItinData<IIC_SprMTMSR , [InstrStage<1, [G4_SRU]>]>, + InstrItinData<IIC_SprMTSR , [InstrStage<2, [G4_SRU]>]>, + InstrItinData<IIC_SprTLBSYNC , [InstrStage<8, [G4_SRU]>]>, + InstrItinData<IIC_SprMFCR , [InstrStage<1, [G4_SRU]>]>, + InstrItinData<IIC_SprMFMSR , [InstrStage<1, [G4_SRU]>]>, + InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G4_SRU]>]>, + InstrItinData<IIC_SprMFTB , [InstrStage<1, [G4_SRU]>]>, + InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G4_SRU]>]>, + InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G4_SRU]>]>, + InstrItinData<IIC_SprRFI , [InstrStage<2, [G4_SRU]>]>, + InstrItinData<IIC_SprSC , [InstrStage<2, [G4_SRU]>]>, + InstrItinData<IIC_FPGeneral , [InstrStage<1, [G4_FPU1]>]>, + InstrItinData<IIC_FPAddSub , [InstrStage<1, [G4_FPU1]>]>, + InstrItinData<IIC_FPCompare , [InstrStage<1, [G4_FPU1]>]>, + InstrItinData<IIC_FPDivD , [InstrStage<31, [G4_FPU1]>]>, + InstrItinData<IIC_FPDivS , [InstrStage<17, [G4_FPU1]>]>, + InstrItinData<IIC_FPFused , [InstrStage<1, [G4_FPU1]>]>, + InstrItinData<IIC_FPRes , [InstrStage<10, [G4_FPU1]>]>, + InstrItinData<IIC_VecGeneral , [InstrStage<1, [G4_VIU1]>]>, + InstrItinData<IIC_VecFP , [InstrStage<4, [G4_VFPU]>]>, + InstrItinData<IIC_VecFPCompare, [InstrStage<1, [G4_VIU1]>]>, + InstrItinData<IIC_VecComplex , [InstrStage<3, [G4_VIU2]>]>, + InstrItinData<IIC_VecPerm , [InstrStage<1, [G4_VPU]>]>, + InstrItinData<IIC_VecFPRound , [InstrStage<4, [G4_VFPU]>]>, + InstrItinData<IIC_VecVSL , [InstrStage<1, [G4_VIU1]>]>, + InstrItinData<IIC_VecVSR , [InstrStage<1, [G4_VIU1]>]> ]>; diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index a4e82ce23e6f..1d9f13fcb850 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -11,78 +11,102 @@ // //===----------------------------------------------------------------------===// -def IU3 : FuncUnit; // integer unit 3 (7450 simple) -def IU4 : FuncUnit; // integer unit 4 (7450 simple) +def G4P_BPU : FuncUnit; // Branch unit +def G4P_SLU : FuncUnit; // Store/load unit +def G4P_SRU : FuncUnit; // special register unit +def G4P_IU1 : FuncUnit; // integer unit 1 (simple) +def G4P_IU2 : FuncUnit; // integer unit 2 (complex) +def G4P_IU3 : FuncUnit; // integer unit 3 (simple) +def G4P_IU4 : FuncUnit; // integer unit 4 (simple) +def G4P_FPU1 : FuncUnit; // floating point unit 1 +def G4P_VPU : FuncUnit; // vector permutation unit +def G4P_VIU1 : FuncUnit; // vector integer unit 1 (simple) +def G4P_VIU2 : FuncUnit; // vector integer unit 2 (complex) +def G4P_VFPU : FuncUnit; // vector floating point unit def G4PlusItineraries : ProcessorItineraries< - [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [ - InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, - InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, - InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, - InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>, - InstrItinData<IntMFFS , [InstrStage<5, [FPU1]>]>, - InstrItinData<IntMFVSCR , [InstrStage<2, [VFPU]>]>, - InstrItinData<IntMTFSB0 , [InstrStage<5, [FPU1]>]>, - InstrItinData<IntMulHW , [InstrStage<4, [IU2]>]>, - InstrItinData<IntMulHWU , [InstrStage<4, [IU2]>]>, - InstrItinData<IntMulLI , [InstrStage<3, [IU2]>]>, - InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, - InstrItinData<IntShift , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>, - InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>, - InstrItinData<BrB , [InstrStage<1, [BPU]>]>, - InstrItinData<BrCR , [InstrStage<2, [IU2]>]>, - InstrItinData<BrMCR , [InstrStage<2, [IU2]>]>, - InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>, - InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>, - InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>, - InstrItinData<LdStSTFD , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStSTFDU , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>, - InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>, - InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStLHAU , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>, - InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>, - InstrItinData<SprISYNC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>, - InstrItinData<SprMFSR , [InstrStage<4, [IU2]>]>, - InstrItinData<SprMTMSR , [InstrStage<2, [IU2]>]>, - InstrItinData<SprMTSR , [InstrStage<2, [IU2]>]>, - InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>, - InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>, - InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>, - InstrItinData<SprMFSPR , [InstrStage<4, [IU2]>]>, - InstrItinData<SprMFTB , [InstrStage<5, [IU2]>]>, - InstrItinData<SprMTSPR , [InstrStage<2, [IU2]>]>, - InstrItinData<SprMTSRIN , [InstrStage<2, [IU2]>]>, - InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, - InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>, - InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>, - InstrItinData<FPAddSub , [InstrStage<5, [FPU1]>]>, - InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>, - InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>, - InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>, - InstrItinData<FPFused , [InstrStage<5, [FPU1]>]>, - InstrItinData<FPRes , [InstrStage<14, [FPU1]>]>, - InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>, - InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>, - InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>, - InstrItinData<VecComplex , [InstrStage<4, [VIU2]>]>, - InstrItinData<VecPerm , [InstrStage<2, [VPU]>]>, - InstrItinData<VecFPRound , [InstrStage<4, [VIU1]>]>, - InstrItinData<VecVSL , [InstrStage<2, [VPU]>]>, - InstrItinData<VecVSR , [InstrStage<2, [VPU]>]> + [G4P_IU1, G4P_IU2, G4P_IU3, G4P_IU4, G4P_BPU, G4P_SLU, G4P_FPU1, + G4P_VFPU, G4P_VIU1, G4P_VIU2, G4P_VPU], [], [ + InstrItinData<IIC_IntSimple , [InstrStage<1, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_IntGeneral , [InstrStage<1, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_IntCompare , [InstrStage<1, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_IntDivW , [InstrStage<23, [G4P_IU2]>]>, + InstrItinData<IIC_IntMFFS , [InstrStage<5, [G4P_FPU1]>]>, + InstrItinData<IIC_IntMFVSCR , [InstrStage<2, [G4P_VFPU]>]>, + InstrItinData<IIC_IntMTFSB0 , [InstrStage<5, [G4P_FPU1]>]>, + InstrItinData<IIC_IntMulHW , [InstrStage<4, [G4P_IU2]>]>, + InstrItinData<IIC_IntMulHWU , [InstrStage<4, [G4P_IU2]>]>, + InstrItinData<IIC_IntMulLI , [InstrStage<3, [G4P_IU2]>]>, + InstrItinData<IIC_IntRotate , [InstrStage<1, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_IntShift , [InstrStage<2, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_IntTrapW , [InstrStage<2, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_BrB , [InstrStage<1, [G4P_BPU]>]>, + InstrItinData<IIC_BrCR , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_BrMCR , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_BrMCRX , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStDCBI , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLoad , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStStore , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStDSS , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStICBI , [InstrStage<3, [G4P_IU2]>]>, + InstrItinData<IIC_LdStSTFD , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTFDU , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLFD , [InstrStage<4, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLFDU , [InstrStage<4, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLFDUX , [InstrStage<4, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLHA , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLHAU , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLHAUX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLMW , [InstrStage<37, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLVecX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLWA , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTD , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTDCX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTVEBX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTWCX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSync , [InstrStage<35, [G4P_SLU]>]>, + InstrItinData<IIC_SprISYNC , [InstrStage<0, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_SprMFSR , [InstrStage<4, [G4P_IU2]>]>, + InstrItinData<IIC_SprMTMSR , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_SprMTSR , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_SprMFCR , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_SprMFMSR , [InstrStage<3, [G4P_IU2]>]>, + InstrItinData<IIC_SprMFSPR , [InstrStage<4, [G4P_IU2]>]>, + InstrItinData<IIC_SprMFTB , [InstrStage<5, [G4P_IU2]>]>, + InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_SprRFI , [InstrStage<1, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_SprSC , [InstrStage<0, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_FPGeneral , [InstrStage<5, [G4P_FPU1]>]>, + InstrItinData<IIC_FPAddSub , [InstrStage<5, [G4P_FPU1]>]>, + InstrItinData<IIC_FPCompare , [InstrStage<5, [G4P_FPU1]>]>, + InstrItinData<IIC_FPDivD , [InstrStage<35, [G4P_FPU1]>]>, + InstrItinData<IIC_FPDivS , [InstrStage<21, [G4P_FPU1]>]>, + InstrItinData<IIC_FPFused , [InstrStage<5, [G4P_FPU1]>]>, + InstrItinData<IIC_FPRes , [InstrStage<14, [G4P_FPU1]>]>, + InstrItinData<IIC_VecGeneral , [InstrStage<1, [G4P_VIU1]>]>, + InstrItinData<IIC_VecFP , [InstrStage<4, [G4P_VFPU]>]>, + InstrItinData<IIC_VecFPCompare, [InstrStage<2, [G4P_VFPU]>]>, + InstrItinData<IIC_VecComplex , [InstrStage<4, [G4P_VIU2]>]>, + InstrItinData<IIC_VecPerm , [InstrStage<2, [G4P_VPU]>]>, + InstrItinData<IIC_VecFPRound , [InstrStage<4, [G4P_VIU1]>]>, + InstrItinData<IIC_VecVSL , [InstrStage<2, [G4P_VPU]>]>, + InstrItinData<IIC_VecVSR , [InstrStage<2, [G4P_VPU]>]> ]>; diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index c64998d52a0c..a3b73ab4454f 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -11,90 +11,110 @@ // //===----------------------------------------------------------------------===// +def G5_BPU : FuncUnit; // Branch unit +def G5_SLU : FuncUnit; // Store/load unit +def G5_SRU : FuncUnit; // special register unit +def G5_IU1 : FuncUnit; // integer unit 1 (simple) +def G5_IU2 : FuncUnit; // integer unit 2 (complex) +def G5_FPU1 : FuncUnit; // floating point unit 1 +def G5_FPU2 : FuncUnit; // floating point unit 2 +def G5_VPU : FuncUnit; // vector permutation unit +def G5_VIU1 : FuncUnit; // vector integer unit 1 (simple) +def G5_VIU2 : FuncUnit; // vector integer unit 2 (complex) +def G5_VFPU : FuncUnit; // vector floating point unit + def G5Itineraries : ProcessorItineraries< - [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [ - InstrItinData<IntSimple , [InstrStage<2, [IU1, IU2]>]>, - InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>, - InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>, - InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>, - InstrItinData<IntDivW , [InstrStage<36, [IU1]>]>, - InstrItinData<IntMFFS , [InstrStage<6, [IU2]>]>, - InstrItinData<IntMFVSCR , [InstrStage<1, [VFPU]>]>, - InstrItinData<IntMTFSB0 , [InstrStage<6, [FPU1, FPU2]>]>, - InstrItinData<IntMulHD , [InstrStage<7, [IU1, IU2]>]>, - InstrItinData<IntMulHW , [InstrStage<5, [IU1, IU2]>]>, - InstrItinData<IntMulHWU , [InstrStage<5, [IU1, IU2]>]>, - InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>, - InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>, - InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>, - InstrItinData<IntRotateDI , [InstrStage<2, [IU1, IU2]>]>, - InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>, - InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>, - InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>, - InstrItinData<IntTrapW , [InstrStage<1, [IU1, IU2]>]>, - InstrItinData<BrB , [InstrStage<1, [BPU]>]>, - InstrItinData<BrCR , [InstrStage<4, [BPU]>]>, - InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>, - InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>, - InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>, - InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>, - InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>, - InstrItinData<LdStSTFD , [InstrStage<4, [SLU]>]>, - InstrItinData<LdStSTFDU , [InstrStage<4, [SLU]>]>, - InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStLDU , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>, - InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>, - InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>, - InstrItinData<LdStLHAU , [InstrStage<5, [SLU]>]>, - InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>, - InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>, - InstrItinData<LdStLWARX , [InstrStage<11, [SLU]>]>, - InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work - InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>, - InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>, - InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>, - InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>, - InstrItinData<SprISYNC , [InstrStage<40, [SLU]>]>, // needs work - InstrItinData<SprMFSR , [InstrStage<3, [SLU]>]>, - InstrItinData<SprMTMSR , [InstrStage<3, [SLU]>]>, - InstrItinData<SprMTSR , [InstrStage<3, [SLU]>]>, - InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>, - InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>, - InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>, - InstrItinData<SprMFSPR , [InstrStage<3, [IU2]>]>, - InstrItinData<SprMFTB , [InstrStage<10, [IU2]>]>, - InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>, - InstrItinData<SprSC , [InstrStage<1, [IU2]>]>, - InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>, - InstrItinData<FPAddSub , [InstrStage<6, [FPU1, FPU2]>]>, - InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>, - InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>, - InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>, - InstrItinData<FPFused , [InstrStage<6, [FPU1, FPU2]>]>, - InstrItinData<FPRes , [InstrStage<6, [FPU1, FPU2]>]>, - InstrItinData<FPSqrt , [InstrStage<40, [FPU1, FPU2]>]>, - InstrItinData<VecGeneral , [InstrStage<2, [VIU1]>]>, - InstrItinData<VecFP , [InstrStage<8, [VFPU]>]>, - InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>, - InstrItinData<VecComplex , [InstrStage<5, [VIU2]>]>, - InstrItinData<VecPerm , [InstrStage<3, [VPU]>]>, - InstrItinData<VecFPRound , [InstrStage<8, [VFPU]>]>, - InstrItinData<VecVSL , [InstrStage<2, [VIU1]>]>, - InstrItinData<VecVSR , [InstrStage<3, [VPU]>]> + [G5_IU1, G5_IU2, G5_SLU, G5_BPU, G5_FPU1, G5_FPU2, + G5_VFPU, G5_VIU1, G5_VIU2, G5_VPU], [], [ + InstrItinData<IIC_IntSimple , [InstrStage<2, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntGeneral , [InstrStage<2, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntCompare , [InstrStage<3, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntDivD , [InstrStage<68, [G5_IU1]>]>, + InstrItinData<IIC_IntDivW , [InstrStage<36, [G5_IU1]>]>, + InstrItinData<IIC_IntMFFS , [InstrStage<6, [G5_IU2]>]>, + InstrItinData<IIC_IntMFVSCR , [InstrStage<1, [G5_VFPU]>]>, + InstrItinData<IIC_IntMTFSB0 , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_IntMulHD , [InstrStage<7, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntMulHW , [InstrStage<5, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntMulHWU , [InstrStage<5, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntMulLI , [InstrStage<4, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntRFID , [InstrStage<1, [G5_IU2]>]>, + InstrItinData<IIC_IntRotateD , [InstrStage<2, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntRotateDI , [InstrStage<2, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntRotate , [InstrStage<4, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntShift , [InstrStage<2, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntTrapD , [InstrStage<1, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntTrapW , [InstrStage<1, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_BrB , [InstrStage<1, [G5_BPU]>]>, + InstrItinData<IIC_BrCR , [InstrStage<4, [G5_BPU]>]>, + InstrItinData<IIC_BrMCR , [InstrStage<2, [G5_BPU]>]>, + InstrItinData<IIC_BrMCRX , [InstrStage<3, [G5_BPU]>]>, + InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLoad , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStStore , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStDSS , [InstrStage<10, [G5_SLU]>]>, + InstrItinData<IIC_LdStICBI , [InstrStage<40, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTFD , [InstrStage<4, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTFDU , [InstrStage<4, [G5_SLU]>]>, + InstrItinData<IIC_LdStLD , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLDU , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLDUX , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLDARX , [InstrStage<11, [G5_SLU]>]>, + InstrItinData<IIC_LdStLFD , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLFDU , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStLFDUX , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStLHA , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStLHAU , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStLHAUX , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStLMW , [InstrStage<64, [G5_SLU]>]>, + InstrItinData<IIC_LdStLVecX , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLWA , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStLWARX , [InstrStage<11, [G5_SLU]>]>, + InstrItinData<IIC_LdStSLBIA , [InstrStage<40, [G5_SLU]>]>, // needs work + InstrItinData<IIC_LdStSLBIE , [InstrStage<2, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTD , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTDCX , [InstrStage<11, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTVEBX , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTWCX , [InstrStage<11, [G5_SLU]>]>, + InstrItinData<IIC_LdStSync , [InstrStage<35, [G5_SLU]>]>, + InstrItinData<IIC_SprISYNC , [InstrStage<40, [G5_SLU]>]>, // needs work + InstrItinData<IIC_SprMFSR , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_SprMTMSR , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_SprMTSR , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_SprMFCR , [InstrStage<2, [G5_IU2]>]>, + InstrItinData<IIC_SprMFCRF , [InstrStage<2, [G5_IU2]>]>, + InstrItinData<IIC_SprMFMSR , [InstrStage<3, [G5_IU2]>]>, + InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G5_IU2]>]>, + InstrItinData<IIC_SprMFTB , [InstrStage<10, [G5_IU2]>]>, + InstrItinData<IIC_SprMTSPR , [InstrStage<8, [G5_IU2]>]>, + InstrItinData<IIC_SprSC , [InstrStage<1, [G5_IU2]>]>, + InstrItinData<IIC_FPGeneral , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPAddSub , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPCompare , [InstrStage<8, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPDivD , [InstrStage<33, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPDivS , [InstrStage<33, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPFused , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPRes , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPSqrtD , [InstrStage<40, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPSqrtS , [InstrStage<40, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_VecGeneral , [InstrStage<2, [G5_VIU1]>]>, + InstrItinData<IIC_VecFP , [InstrStage<8, [G5_VFPU]>]>, + InstrItinData<IIC_VecFPCompare, [InstrStage<2, [G5_VFPU]>]>, + InstrItinData<IIC_VecComplex , [InstrStage<5, [G5_VIU2]>]>, + InstrItinData<IIC_VecPerm , [InstrStage<3, [G5_VPU]>]>, + InstrItinData<IIC_VecFPRound , [InstrStage<8, [G5_VFPU]>]>, + InstrItinData<IIC_VecVSL , [InstrStage<2, [G5_VIU1]>]>, + InstrItinData<IIC_VecVSR , [InstrStage<3, [G5_VPU]>]> ]>; // ===---------------------------------------------------------------------===// -// e5500 machine model for scheduling and other instruction cost heuristics. +// G5 machine model for scheduling and other instruction cost heuristics. def G5Model : SchedMachineModel { let IssueWidth = 4; // 4 (non-branch) instructions are dispatched per cycle. diff --git a/lib/Target/PowerPC/PPCScheduleP7.td b/lib/Target/PowerPC/PPCScheduleP7.td new file mode 100644 index 000000000000..d3e426975ec0 --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleP7.td @@ -0,0 +1,385 @@ +//===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the POWER7 processor. +// +//===----------------------------------------------------------------------===// + +// Primary reference: +// IBM POWER7 multicore server processor +// B. Sinharoy, et al. +// IBM J. Res. & Dev. (55) 3. May/June 2011. + +// Scheduling for the P7 involves tracking two types of resources: +// 1. The dispatch bundle slots +// 2. The functional unit resources + +// Dispatch units: +def P7_DU1 : FuncUnit; +def P7_DU2 : FuncUnit; +def P7_DU3 : FuncUnit; +def P7_DU4 : FuncUnit; +def P7_DU5 : FuncUnit; +def P7_DU6 : FuncUnit; + +def P7_LS1 : FuncUnit; // Load/Store pipeline 1 +def P7_LS2 : FuncUnit; // Load/Store pipeline 2 + +def P7_FX1 : FuncUnit; // FX pipeline 1 +def P7_FX2 : FuncUnit; // FX pipeline 2 + +// VS pipeline 1 (vector integer ops. always here) +def P7_VS1 : FuncUnit; // VS pipeline 1 +// VS pipeline 2 (128-bit stores and perms. here) +def P7_VS2 : FuncUnit; // VS pipeline 2 + +def P7_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs) +def P7_BRU : FuncUnit; // BR unit + +// Notes: +// Each LSU pipeline can also execute FX add and logical instructions. +// Each LSU pipeline can complete a load or store in one cycle. +// +// Each store is broken into two parts, AGEN goes to the LSU while a +// "data steering" op. goes to the FXU or VSU. +// +// FX loads have a two cycle load-to-use latency (so one "bubble" cycle). +// VSU loads have a three cycle load-to-use latency (so two "bubble" cycle). +// +// Frequent FX ops. take only one cycle and results can be used again in the +// next cycle (there is a self-bypass). Getting results from the other FX +// pipeline takes an additional cycle. +// +// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles +// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops. +// Dispatch of an instruction to VS1 that uses four single prec. inputs +// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any +// floating point instruction. +// +// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles +// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline +// (unlike on the POWER6). +// +// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP +// share the same write-back, and have a 5-cycle latency difference, so the +// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP +// op. has been dispatched to VS1. +// +// Three cycles after an L1 cache hit, a dependent VSU instruction can issue. +// +// Instruction dispatch groups have (at most) four non-branch instructions, and +// two branches. Unlike on the POWER4/5, a branch does not automatically +// end the dispatch group, but a second branch must be the last in the group. + +def P7Itineraries : ProcessorItineraries< + [P7_DU1, P7_DU2, P7_DU3, P7_DU4, P7_DU5, P7_DU6, + P7_LS1, P7_LS2, P7_FX1, P7_FX2, P7_VS1, P7_VS2, P7_CRU, P7_BRU], [], [ + InstrItinData<IIC_IntSimple , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2, + P7_LS1, P7_LS2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntGeneral , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntCompare , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + // FIXME: Add record-form itinerary data. + InstrItinData<IIC_IntDivW , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<36, [P7_FX1, P7_FX2]>], + [36, 1, 1]>, + InstrItinData<IIC_IntDivD , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<68, [P7_FX1, P7_FX2]>], + [68, 1, 1]>, + InstrItinData<IIC_IntMulHW , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 1, 1]>, + InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 1, 1]>, + InstrItinData<IIC_IntMulLI , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 1, 1]>, + InstrItinData<IIC_IntRotate , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntRotateD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntShift , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntTrapW , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1]>, + InstrItinData<IIC_IntTrapD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1]>, + InstrItinData<IIC_BrB , [InstrStage<1, [P7_DU5, P7_DU6], 0>, + InstrStage<1, [P7_BRU]>], + [3, 1, 1]>, + InstrItinData<IIC_BrCR , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_CRU]>], + [3, 1, 1]>, + InstrItinData<IIC_BrMCR , [InstrStage<1, [P7_DU5, P7_DU6], 0>, + InstrStage<1, [P7_BRU]>], + [3, 1, 1]>, + InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU5, P7_DU6], 0>, + InstrStage<1, [P7_BRU]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLoad , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [2, 1, 1]>, + InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 2, 1, 1]>, + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [2, 1, 1]>, + InstrItinData<IIC_LdStLDU , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 2, 1, 1]>, + InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLFD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLHA , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 4, 1, 1]>, + InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 4, 1, 1]>, + InstrItinData<IIC_LdStLWA , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLMW , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [2, 1, 1]>, + InstrItinData<IIC_LdStStore , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 1, 1, 1]>, + InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 1, 1, 1]>, + InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [2, 1, 1, 1]>, + InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_VS2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [1, 1, 1]>, + InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_CRU]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 1]>, // mtcr + InstrItinData<IIC_SprMFCR , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_CRU]>], + [6, 1]>, + InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_CRU]>], + [3, 1]>, + InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_FX1]>], + [4, 1]>, // mtctr + InstrItinData<IIC_FPGeneral , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1]>, + InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [8, 1, 1]>, + InstrItinData<IIC_FPDivD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [33, 1, 1]>, + InstrItinData<IIC_FPDivS , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [27, 1, 1]>, + InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [44, 1, 1]>, + InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [32, 1, 1]>, + InstrItinData<IIC_FPFused , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1, 1]>, + InstrItinData<IIC_FPRes , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1]>, + InstrItinData<IIC_VecGeneral , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_VS1]>], + [2, 1, 1]>, + InstrItinData<IIC_VecVSL , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_VS1]>], + [2, 1, 1]>, + InstrItinData<IIC_VecVSR , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_VS1]>], + [2, 1, 1]>, + InstrItinData<IIC_VecFP , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [6, 1, 1]>, + InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [6, 1, 1]>, + InstrItinData<IIC_VecFPRound , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [6, 1, 1]>, + InstrItinData<IIC_VecComplex , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_VS1]>], + [7, 1, 1]>, + InstrItinData<IIC_VecPerm , [InstrStage<1, [P7_DU1, P7_DU2], 0>, + InstrStage<1, [P7_VS2]>], + [3, 1, 1]> +]>; + +// ===---------------------------------------------------------------------===// +// P7 machine model for scheduling and other instruction cost heuristics. + +def P7Model : SchedMachineModel { + let IssueWidth = 6; // 4 (non-branch) instructions are dispatched per cycle. + // Note that the dispatch bundle size is 6 (including + // branches), but the total internal issue bandwidth per + // cycle (from all queues) is 8. + + let MinLatency = 0; // Out-of-order dispatch. + let LoadLatency = 3; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 16; + + let Itineraries = P7Itineraries; +} + diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp index d4258b4a0eb1..dc1674214769 100644 --- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp @@ -11,13 +11,12 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "powerpc-selectiondag-info" #include "PPCTargetMachine.h" using namespace llvm; -PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM) - : TargetSelectionDAGInfo(TM) { -} +#define DEBUG_TYPE "powerpc-selectiondag-info" + +PPCSelectionDAGInfo::PPCSelectionDAGInfo(const DataLayout *DL) + : TargetSelectionDAGInfo(DL) {} -PPCSelectionDAGInfo::~PPCSelectionDAGInfo() { -} +PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {} diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h index 341b69cdfb5f..b2e7f3b5f2ac 100644 --- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -22,7 +22,7 @@ class PPCTargetMachine; class PPCSelectionDAGInfo : public TargetSelectionDAGInfo { public: - explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM); + explicit PPCSelectionDAGInfo(const DataLayout *DL); ~PPCSelectionDAGInfo(); }; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 7231ab101a26..b51512d335fc 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -17,28 +17,72 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/GlobalValue.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" #include <cstdlib> +using namespace llvm; + +#define DEBUG_TYPE "ppc-subtarget" + #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "PPCGenSubtargetInfo.inc" -using namespace llvm; +/// Return the datalayout string of a subtarget. +static std::string getDataLayoutString(const PPCSubtarget &ST) { + const Triple &T = ST.getTargetTriple(); -PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit) - : PPCGenSubtargetInfo(TT, CPU, FS) - , IsPPC64(is64Bit) - , TargetTriple(TT) { + std::string Ret; + + // Most PPC* platforms are big endian, PPC64LE is little endian. + if (ST.isLittleEndian()) + Ret = "e"; + else + Ret = "E"; + + Ret += DataLayout::getManglingComponent(T); + + // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit + // pointers. + if (!ST.isPPC64() || T.getOS() == Triple::Lv2) + Ret += "-p:32:32"; + + // Note, the alignment values for f64 and i64 on ppc64 in Darwin + // documentation are wrong; these are correct (i.e. "what gcc does"). + if (ST.isPPC64() || ST.isSVR4ABI()) + Ret += "-i64:64"; + else + Ret += "-f64:32:64"; + + // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. + if (ST.isPPC64()) + Ret += "-n32:64"; + else + Ret += "-n32"; + + return Ret; +} + +PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU, + StringRef FS) { initializeEnvironment(); resetSubtargetFeatures(CPU, FS); + return *this; } +PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, PPCTargetMachine &TM, + bool is64Bit, CodeGenOpt::Level OptLevel) + : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT), + OptLevel(OptLevel), + FrameLowering(initializeSubtargetDependencies(CPU, FS)), + DL(getDataLayoutString(*this)), InstrInfo(*this), JITInfo(*this), + TLInfo(TM), TSInfo(&DL) {} + /// SetJITMode - This is called to inform the subtarget info that we are /// producing code for the JIT. void PPCSubtarget::SetJITMode() { @@ -73,8 +117,10 @@ void PPCSubtarget::initializeEnvironment() { HasMFOCRF = false; Has64BitSupport = false; Use64BitRegs = false; + UseCRBits = false; HasAltivec = false; HasQPX = false; + HasVSX = false; HasFCPSGN = false; HasFSQRT = false; HasFRE = false; @@ -124,6 +170,14 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { FullFS = "+64bit"; } + // At -O2 and above, track CR bits as individual registers. + if (OptLevel >= CodeGenOpt::Default) { + if (!FullFS.empty()) + FullFS = "+crbits," + FullFS; + else + FullFS = "+crbits"; + } + // Parse features string. ParseSubtargetFeatures(CPUName, FullFS); @@ -144,6 +198,11 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // Determine endianness. IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le); + + // FIXME: For now, we disable VSX in little-endian mode until endian + // issues in those instructions can be addressed. + if (IsLittleEndian) + HasVSX = false; } /// hasLazyResolverStub - Return true if accesses to the specified global have @@ -163,23 +222,7 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV, GV->hasCommonLinkage() || isDecl; } -bool PPCSubtarget::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_ALL; - - CriticalPathRCs.clear(); - - if (isPPC64()) - CriticalPathRCs.push_back(&PPC::G8RCRegClass); - else - CriticalPathRCs.push_back(&PPC::GPRCRegClass); - - return OptLevel >= CodeGenOpt::Default; -} - -// Embedded cores need aggressive scheduling. +// Embedded cores need aggressive scheduling (and some others also benefit). static bool needsAggressiveScheduling(unsigned Directive) { switch (Directive) { default: return false; @@ -187,6 +230,8 @@ static bool needsAggressiveScheduling(unsigned Directive) { case PPC::DIR_A2: case PPC::DIR_E500mc: case PPC::DIR_E5500: + case PPC::DIR_PWR7: + case PPC::DIR_PWR8: return true; } } @@ -198,6 +243,19 @@ bool PPCSubtarget::enableMachineScheduler() const { return needsAggressiveScheduling(DarwinDirective); } +// This overrides the PostRAScheduler bit in the SchedModel for each CPU. +bool PPCSubtarget::enablePostMachineScheduler() const { return true; } + +PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const { + return TargetSubtargetInfo::ANTIDEP_ALL; +} + +void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { + CriticalPathRCs.clear(); + CriticalPathRCs.push_back(isPPC64() ? + &PPC::G8RCRegClass : &PPC::GPRCRegClass); +} + void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, MachineInstr *end, diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index ec8c82ad521c..a3cedafb5ef2 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -14,7 +14,13 @@ #ifndef POWERPCSUBTARGET_H #define POWERPCSUBTARGET_H +#include "PPCFrameLowering.h" +#include "PPCInstrInfo.h" +#include "PPCISelLowering.h" +#include "PPCJITInfo.h" +#include "PPCSelectionDAGInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -50,6 +56,7 @@ namespace PPC { DIR_PWR6, DIR_PWR6X, DIR_PWR7, + DIR_PWR8, DIR_64 }; } @@ -73,6 +80,7 @@ protected: bool HasMFOCRF; bool Has64BitSupport; bool Use64BitRegs; + bool UseCRBits; bool IsPPC64; bool HasAltivec; bool HasQPX; @@ -98,12 +106,23 @@ protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; + /// OptLevel - What default optimization level we're emitting code for. + CodeGenOpt::Level OptLevel; + + PPCFrameLowering FrameLowering; + const DataLayout DL; + PPCInstrInfo InstrInfo; + PPCJITInfo JITInfo; + PPCTargetLowering TLInfo; + PPCSelectionDAGInfo TSInfo; + public: /// This constructor initializes the data members to match that /// of the specified triple. /// PPCSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit); + const std::string &FS, PPCTargetMachine &TM, bool is64Bit, + CodeGenOpt::Level OptLevel); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -122,12 +141,23 @@ public: /// unsigned getDarwinDirective() const { return DarwinDirective; } - /// getInstrItins - Return the instruction itineraies based on subtarget + /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + const PPCFrameLowering *getFrameLowering() const { return &FrameLowering; } + const DataLayout *getDataLayout() const { return &DL; } + const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } + PPCJITInfo *getJITInfo() { return &JITInfo; } + const PPCTargetLowering *getTargetLowering() const { return &TLInfo; } + const PPCSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + + /// initializeSubtargetDependencies - Initializes using a CPU and feature string + /// so that we can use initializer lists for subtarget initialization. + PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); + /// \brief Reset the features for the PowerPC target. - virtual void resetSubtargetFeatures(const MachineFunction *MF); + void resetSubtargetFeatures(const MachineFunction *MF) override; private: void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); @@ -146,6 +176,10 @@ public: /// has64BitSupport() returns true. bool use64BitRegs() const { return Use64BitRegs; } + /// useCRBits - Return true if we should store and manipulate i1 values in + /// the individual condition register bits. + bool useCRBits() const { return UseCRBits; } + /// hasLazyResolverStub - Return true if accesses to the specified global have /// to go through a dyld lazy resolution stub. This means that an extra load /// is required to get the address of the global. @@ -172,6 +206,7 @@ public: bool hasFPCVT() const { return HasFPCVT; } bool hasAltivec() const { return HasAltivec; } bool hasQPX() const { return HasQPX; } + bool hasVSX() const { return HasVSX; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } @@ -184,26 +219,32 @@ public: /// isDarwin - True if this is any darwin platform. bool isDarwin() const { return TargetTriple.isMacOSX(); } - /// isBGP - True if this is a BG/P platform. - bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; } /// isBGQ - True if this is a BG/Q platform. bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; } + bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } + bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } + bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } + /// FIXME: Should use a command-line option. + bool isELFv2ABI() const { return isPPC64() && isSVR4ABI() && + isLittleEndian(); } - /// enablePostRAScheduler - True at 'More' optimization. - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const; + bool enableEarlyIfConversion() const override { return hasISEL(); } // Scheduling customization. - bool enableMachineScheduler() const; + bool enableMachineScheduler() const override; + // This overrides the PostRAScheduler bit in the SchedModel for each CPU. + bool enablePostMachineScheduler() const override; + AntiDepBreakMode getAntiDepBreakMode() const override; + void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override; + void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, MachineInstr *end, - unsigned NumRegionInstrs) const; - bool useAA() const; + unsigned NumRegionInstrs) const override; + bool useAA() const override; }; } // End llvm namespace diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index d6767d51f2cc..9563b9045c39 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -26,6 +26,10 @@ static cl:: opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden, cl::desc("Disable CTR loops for PPC")); +static cl::opt<bool> +VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early", + cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early")); + extern "C" void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target); @@ -33,59 +37,12 @@ extern "C" void LLVMInitializePowerPCTarget() { RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget); } -/// Return the datalayout string of a subtarget. -static std::string getDataLayoutString(const PPCSubtarget &ST) { - const Triple &T = ST.getTargetTriple(); - - // PPC is big endian - std::string Ret = "E"; - - // PPC64 has 64 bit pointers, PPC32 has 32 bit pointers. - if (ST.isPPC64()) - Ret += "-p:64:64"; - else - Ret += "-p:32:32"; - - // Note, the alignment values for f64 and i64 on ppc64 in Darwin - // documentation are wrong; these are correct (i.e. "what gcc does"). - if (ST.isPPC64() || ST.isSVR4ABI()) - Ret += "-f64:64:64-i64:64:64"; - else - Ret += "-f64:32:64"; - - // Set support for 128 floats depending on the ABI. - if (!ST.isPPC64() && ST.isSVR4ABI()) - Ret += "-f128:64:128"; - - // Some ABIs support 128 bit vectors. - if (ST.isPPC64() && ST.isSVR4ABI()) - Ret += "-v128:128:128"; - - // PPC64 has 32 and 64 bit register, PPC32 has only 32 bit ones. - if (ST.isPPC64()) - Ret += "-n32:64"; - else - Ret += "-n32"; - - return Ret; -} - -PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, +PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, is64Bit), - DL(getDataLayoutString(Subtarget)), InstrInfo(*this), - FrameLowering(Subtarget), JITInfo(*this, is64Bit), - TLInfo(*this), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()) { - - // The binutils for the BG/P are too old for CFI. - if (Subtarget.isBGP()) - setMCUseCFI(false); + CodeGenOpt::Level OL, bool is64Bit) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, *this, is64Bit, OL) { initAsmInfo(); } @@ -129,11 +86,12 @@ public: return *getPPCTargetMachine().getSubtargetImpl(); } - virtual bool addPreISel(); - virtual bool addILPOpts(); - virtual bool addInstSelector(); - virtual bool addPreSched2(); - virtual bool addPreEmitPass(); + bool addPreISel() override; + bool addILPOpts() override; + bool addInstSelector() override; + bool addPreRegAlloc() override; + bool addPreSched2() override; + bool addPreEmitPass() override; }; } // namespace @@ -149,12 +107,8 @@ bool PPCPassConfig::addPreISel() { } bool PPCPassConfig::addILPOpts() { - if (getPPCSubtarget().hasISEL()) { - addPass(&EarlyIfConverterID); - return true; - } - - return false; + addPass(&EarlyIfConverterID); + return true; } bool PPCPassConfig::addInstSelector() { @@ -166,10 +120,20 @@ bool PPCPassConfig::addInstSelector() { addPass(createPPCCTRLoopsVerify()); #endif + addPass(createPPCVSXCopyPass()); + return false; +} + +bool PPCPassConfig::addPreRegAlloc() { + initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); + insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, + &PPCVSXFMAMutateID); return false; } bool PPCPassConfig::addPreSched2() { + addPass(createPPCVSXCopyCleanupPass()); + if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 606ccb314126..4c7029ca7a36 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -14,11 +14,7 @@ #ifndef PPC_TARGETMACHINE_H #define PPC_TARGETMACHINE_H -#include "PPCFrameLowering.h" -#include "PPCISelLowering.h" #include "PPCInstrInfo.h" -#include "PPCJITInfo.h" -#include "PPCSelectionDAGInfo.h" #include "PPCSubtarget.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" @@ -29,13 +25,6 @@ namespace llvm { /// class PPCTargetMachine : public LLVMTargetMachine { PPCSubtarget Subtarget; - const DataLayout DL; // Calculates type size & alignment - PPCInstrInfo InstrInfo; - PPCFrameLowering FrameLowering; - PPCJITInfo JITInfo; - PPCTargetLowering TLInfo; - PPCSelectionDAGInfo TSInfo; - InstrItineraryData InstrItins; public: PPCTargetMachine(const Target &T, StringRef TT, @@ -43,34 +32,38 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit); - virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const PPCFrameLowering *getFrameLowering() const { - return &FrameLowering; + const PPCInstrInfo *getInstrInfo() const override { + return getSubtargetImpl()->getInstrInfo(); } - virtual PPCJITInfo *getJITInfo() { return &JITInfo; } - virtual const PPCTargetLowering *getTargetLowering() const { - return &TLInfo; + const PPCFrameLowering *getFrameLowering() const override { + return getSubtargetImpl()->getFrameLowering(); } - virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const { - return &TSInfo; + PPCJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); } + const PPCTargetLowering *getTargetLowering() const override { + return getSubtargetImpl()->getTargetLowering(); } - virtual const PPCRegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); + const PPCSelectionDAGInfo* getSelectionDAGInfo() const override { + return getSubtargetImpl()->getSelectionDAGInfo(); + } + const PPCRegisterInfo *getRegisterInfo() const override { + return &getInstrInfo()->getRegisterInfo(); } - virtual const DataLayout *getDataLayout() const { return &DL; } - virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const InstrItineraryData *getInstrItineraryData() const { - return &InstrItins; + const DataLayout *getDataLayout() const override { + return getSubtargetImpl()->getDataLayout(); + } + const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; } + const InstrItineraryData *getInstrItineraryData() const override { + return &getSubtargetImpl()->getInstrItineraryData(); } // Pass Pipeline Configuration - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); - virtual bool addCodeEmitter(PassManagerBase &PM, - JITCodeEmitter &JCE); + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + bool addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) override; /// \brief Register PPC analysis passes with a pass manager. - virtual void addAnalysisPasses(PassManagerBase &PM); + void addAnalysisPasses(PassManagerBase &PM) override; }; /// PPC32TargetMachine - PowerPC 32-bit target machine. diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp index ec1e606eee56..2903cc192aa8 100644 --- a/lib/Target/PowerPC/PPCTargetObjectFile.cpp +++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp @@ -8,10 +8,10 @@ //===----------------------------------------------------------------------===// #include "PPCTargetObjectFile.h" +#include "llvm/IR/Mangler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Target/Mangler.h" using namespace llvm; @@ -22,16 +22,9 @@ Initialize(MCContext &Ctx, const TargetMachine &TM) { InitializeELF(TM.Options.UseInitArray); } -const MCSection * PPC64LinuxTargetObjectFile:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - - const MCSection *DefaultSection = - TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM); - - if (DefaultSection != ReadOnlySection) - return DefaultSection; - +const MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { // Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI // when we have a constant that contains global relocations. This is // necessary because of this ABI's handling of pointers to functions in @@ -46,14 +39,17 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // linker, so we must use DataRelROSection instead of ReadOnlySection. // For more information, see the description of ELIMINATE_COPY_RELOCS in // GNU ld. - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + if (Kind.isReadOnly()) { + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); - if (GVar && GVar->isConstant() && - (GVar->getInitializer()->getRelocationInfo() == - Constant::GlobalRelocations)) - return DataRelROSection; + if (GVar && GVar->isConstant() && + (GVar->getInitializer()->getRelocationInfo() == + Constant::GlobalRelocations)) + Kind = SectionKind::getReadOnlyWithRel(); + } - return DefaultSection; + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, + Mang, TM); } const MCExpr *PPC64LinuxTargetObjectFile:: diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h index 262c52213d29..3e71bbc67379 100644 --- a/lib/Target/PowerPC/PPCTargetObjectFile.h +++ b/lib/Target/PowerPC/PPCTargetObjectFile.h @@ -20,14 +20,14 @@ namespace llvm { /// 64-bit PowerPC Linux. class PPC64LinuxTargetObjectFile : public TargetLoweringObjectFileELF { - virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; - virtual const MCSection * - SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const; + const MCSection *SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const override; /// \brief Describe a TLS variable address within debug info. - virtual const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const; + const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override; }; } // end namespace llvm diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h index e876be16a9b3..73fb69101353 100644 --- a/lib/Target/PowerPC/PPCTargetStreamer.h +++ b/lib/Target/PowerPC/PPCTargetStreamer.h @@ -15,8 +15,12 @@ namespace llvm { class PPCTargetStreamer : public MCTargetStreamer { public: + PPCTargetStreamer(MCStreamer &S); virtual ~PPCTargetStreamer(); virtual void emitTCEntry(const MCSymbol &S) = 0; + virtual void emitMachine(StringRef CPU) = 0; + virtual void emitAbiVersion(int AbiVersion) = 0; + virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) = 0; }; } diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 8879630270e2..007901b23e0c 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -14,17 +14,22 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ppctti" #include "PPC.h" #include "PPCTargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/CostTable.h" +#include "llvm/Target/TargetLowering.h" using namespace llvm; +#define DEBUG_TYPE "ppctti" + +static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting", +cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden); + // Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the +// don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. namespace llvm { void initializePPCTTIPass(PassRegistry &); @@ -32,35 +37,26 @@ void initializePPCTTIPass(PassRegistry &); namespace { -class PPCTTI : public ImmutablePass, public TargetTransformInfo { - const PPCTargetMachine *TM; +class PPCTTI final : public ImmutablePass, public TargetTransformInfo { const PPCSubtarget *ST; const PPCTargetLowering *TLI; - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; - public: - PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + PPCTTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } PPCTTI(const PPCTargetMachine *TM) - : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + : ImmutablePass(ID), ST(TM->getSubtargetImpl()), TLI(TM->getTargetLowering()) { initializePPCTTIPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() { + virtual void initializePass() override { pushTTIStack(this); } - virtual void finalizePass() { - popTTIStack(); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { TargetTransformInfo::getAnalysisUsage(AU); } @@ -68,7 +64,7 @@ public: static char ID; /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) { + virtual void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &TargetTransformInfo::ID) return (TargetTransformInfo*)this; return this; @@ -76,31 +72,40 @@ public: /// \name Scalar TTI Implementations /// @{ - virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; - virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; + + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) const override; + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) const override; + + virtual PopcntSupportKind + getPopcntSupport(unsigned TyWidth) const override; + virtual void getUnrollingPreferences( + Loop *L, UnrollingPreferences &UP) const override; /// @} /// \name Vector TTI Implementations /// @{ - virtual unsigned getNumberOfRegisters(bool Vector) const; - virtual unsigned getRegisterBitWidth(bool Vector) const; - virtual unsigned getMaximumUnrollFactor() const; + virtual unsigned getNumberOfRegisters(bool Vector) const override; + virtual unsigned getRegisterBitWidth(bool Vector) const override; + virtual unsigned getMaximumUnrollFactor() const override; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, - OperandValueKind) const; + OperandValueKind) const override; virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index, Type *SubTp) const; + int Index, Type *SubTp) const override; virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const; + Type *Src) const override; virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const; + Type *CondTy) const override; virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const; + unsigned Index) const override; virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const; + unsigned AddressSpace) const override; /// @} }; @@ -130,6 +135,142 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { return PSK_Software; } +unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { + if (DisablePPCConstHoist) + return TargetTransformInfo::getIntImmCost(Imm, Ty); + + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0) + return ~0U; + + if (Imm == 0) + return TCC_Free; + + if (Imm.getBitWidth() <= 64) { + if (isInt<16>(Imm.getSExtValue())) + return TCC_Basic; + + if (isInt<32>(Imm.getSExtValue())) { + // A constant that can be materialized using lis. + if ((Imm.getZExtValue() & 0xFFFF) == 0) + return TCC_Basic; + + return 2 * TCC_Basic; + } + } + + return 4 * TCC_Basic; +} + +unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const { + if (DisablePPCConstHoist) + return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty); + + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0) + return ~0U; + + switch (IID) { + default: return TCC_Free; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue())) + return TCC_Free; + break; + } + return PPCTTI::getIntImmCost(Imm, Ty); +} + +unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) const { + if (DisablePPCConstHoist) + return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty); + + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0) + return ~0U; + + unsigned ImmIdx = ~0U; + bool ShiftedFree = false, RunFree = false, UnsignedFree = false, + ZeroFree = false; + switch (Opcode) { + default: return TCC_Free; + case Instruction::GetElementPtr: + // Always hoist the base address of a GetElementPtr. This prevents the + // creation of new constants for every base constant that gets constant + // folded with the offset. + if (Idx == 0) + return 2 * TCC_Basic; + return TCC_Free; + case Instruction::And: + RunFree = true; // (for the rotate-and-mask instructions) + // Fallthrough... + case Instruction::Add: + case Instruction::Or: + case Instruction::Xor: + ShiftedFree = true; + // Fallthrough... + case Instruction::Sub: + case Instruction::Mul: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + ImmIdx = 1; + break; + case Instruction::ICmp: + UnsignedFree = true; + ImmIdx = 1; + // Fallthrough... (zero comparisons can use record-form instructions) + case Instruction::Select: + ZeroFree = true; + break; + case Instruction::PHI: + case Instruction::Call: + case Instruction::Ret: + case Instruction::Load: + case Instruction::Store: + break; + } + + if (ZeroFree && Imm == 0) + return TCC_Free; + + if (Idx == ImmIdx && Imm.getBitWidth() <= 64) { + if (isInt<16>(Imm.getSExtValue())) + return TCC_Free; + + if (RunFree) { + if (Imm.getBitWidth() <= 32 && + (isShiftedMask_32(Imm.getZExtValue()) || + isShiftedMask_32(~Imm.getZExtValue()))) + return TCC_Free; + + + if (ST->isPPC64() && + (isShiftedMask_64(Imm.getZExtValue()) || + isShiftedMask_64(~Imm.getZExtValue()))) + return TCC_Free; + } + + if (UnsignedFree && isUInt<16>(Imm.getZExtValue())) + return TCC_Free; + + if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0) + return TCC_Free; + } + + return PPCTTI::getIntImmCost(Imm, Ty); +} + void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { if (ST->getDarwinDirective() == PPC::DIR_A2) { // The A2 is in-order with a deep pipeline, and concatenation unrolling @@ -141,7 +282,7 @@ void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { if (Vector && !ST->hasAltivec()) return 0; - return 32; + return ST->hasVSX() ? 64 : 32; } unsigned PPCTTI::getRegisterBitWidth(bool Vector) const { @@ -210,11 +351,21 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) { + // Double-precision scalars are already located in index #0. + if (Index == 0) + return 0; + + return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); + } + // Estimated cost of a load-hit-store delay. This was obtained // experimentally as a minimum needed to prevent unprofitable // vectorization for the paq8p benchmark. It may need to be // raised further if other unprofitable cases remain. - unsigned LHSPenalty = 12; + unsigned LHSPenalty = 2; + if (ISD == ISD::INSERT_VECTOR_ELT) + LHSPenalty += 7; // Vector element insert/extract with Altivec is very expensive, // because they require store and reload with the attendant @@ -235,14 +386,34 @@ unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && "Invalid Opcode"); - // Each load/store unit costs 1. - unsigned Cost = LT.first * 1; + unsigned Cost = + TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + + // VSX loads/stores support unaligned access. + if (ST->hasVSX()) { + if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64) + return Cost; + } + + bool UnalignedAltivec = + Src->isVectorTy() && + Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() && + LT.second.getSizeInBits() == 128 && + Opcode == Instruction::Load; // PPC in general does not support unaligned loads and stores. They'll need // to be decomposed based on the alignment factor. unsigned SrcBytes = LT.second.getStoreSize(); - if (SrcBytes && Alignment && Alignment < SrcBytes) - Cost *= (SrcBytes/Alignment); + if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) { + Cost += LT.first*(SrcBytes/Alignment-1); + + // For a vector type, there is also scalarization overhead (only for + // stores, loads are expanded using the vector-load + permutation sequence, + // which is much less expensive). + if (Src->isVectorTy() && Opcode == Instruction::Store) + for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i) + Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i); + } return Cost; } diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt index fdb8a62b9d24..c9548c7fe0cd 100644 --- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt +++ b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMPowerPCInfo PowerPCTargetInfo.cpp ) - -add_dependencies(LLVMPowerPCInfo PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt index f77d85b15ab9..410234686400 100644 --- a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt +++ b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = PowerPCInfo parent = PowerPC -required_libraries = MC Support Target +required_libraries = Support add_to_library_groups = PowerPC |