aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2014-11-24 09:08:18 +0000
committerDimitry Andric <dim@FreeBSD.org>2014-11-24 09:08:18 +0000
commit5ca98fd98791947eba83a1ed3f2c8191ef7afa6c (patch)
treef5944309621cee4fe0976be6f9ac619b7ebfc4c2 /lib/Target/PowerPC
parent68bcb7db193e4bc81430063148253d30a791023e (diff)
downloadsrc-5ca98fd98791947eba83a1ed3f2c8191ef7afa6c.tar.gz
src-5ca98fd98791947eba83a1ed3f2c8191ef7afa6c.zip
Notes
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/AsmParser/CMakeLists.txt5
-rw-r--r--lib/Target/PowerPC/AsmParser/LLVMBuild.txt4
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp471
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt4
-rw-r--r--lib/Target/PowerPC/Disassembler/CMakeLists.txt3
-rw-r--r--lib/Target/PowerPC/Disassembler/LLVMBuild.txt23
-rw-r--r--lib/Target/PowerPC/Disassembler/Makefile16
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp348
-rw-r--r--lib/Target/PowerPC/InstPrinter/CMakeLists.txt4
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp21
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h8
-rw-r--r--lib/Target/PowerPC/LLVMBuild.txt6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp95
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp126
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp37
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h9
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp175
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp42
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h12
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp121
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp8
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp8
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h7
-rw-r--r--lib/Target/PowerPC/Makefile5
-rw-r--r--lib/Target/PowerPC/PPC.h16
-rw-r--r--lib/Target/PowerPC/PPC.td59
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp475
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp18
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp55
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td39
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp13
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp272
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp340
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h233
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp218
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h43
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp1031
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp2699
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h190
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td442
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td297
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td219
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp976
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h178
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td1302
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td816
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp17
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h45
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp66
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.cpp8
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h15
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp213
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h49
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td86
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td981
-rw-r--r--lib/Target/PowerPC/PPCSchedule440.td1118
-rw-r--r--lib/Target/PowerPC/PPCScheduleA2.td246
-rw-r--r--lib/Target/PowerPC/PPCScheduleE500mc.td493
-rw-r--r--lib/Target/PowerPC/PPCScheduleE5500.td592
-rw-r--r--lib/Target/PowerPC/PPCScheduleG3.td119
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4.td147
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4Plus.td168
-rw-r--r--lib/Target/PowerPC/PPCScheduleG5.td180
-rw-r--r--lib/Target/PowerPC/PPCScheduleP7.td385
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.cpp11
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp106
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h65
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp90
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h51
-rw-r--r--lib/Target/PowerPC/PPCTargetObjectFile.cpp30
-rw-r--r--lib/Target/PowerPC/PPCTargetObjectFile.h10
-rw-r--r--lib/Target/PowerPC/PPCTargetStreamer.h4
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp241
-rw-r--r--lib/Target/PowerPC/TargetInfo/CMakeLists.txt4
-rw-r--r--lib/Target/PowerPC/TargetInfo/LLVMBuild.txt2
78 files changed, 11970 insertions, 5066 deletions
diff --git a/lib/Target/PowerPC/AsmParser/CMakeLists.txt b/lib/Target/PowerPC/AsmParser/CMakeLists.txt
index 3aa59c00c369..408858e424d5 100644
--- a/lib/Target/PowerPC/AsmParser/CMakeLists.txt
+++ b/lib/Target/PowerPC/AsmParser/CMakeLists.txt
@@ -1,8 +1,3 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
- ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
add_llvm_library(LLVMPowerPCAsmParser
PPCAsmParser.cpp
)
-
-add_dependencies(LLVMPowerPCAsmParser PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
index 02ebf1d3d3ed..801f27bb7bc3 100644
--- a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
+++ b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt --------------*- Conf -*--===;
+;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt -------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -19,5 +19,5 @@
type = Library
name = PowerPCAsmParser
parent = PowerPC
-required_libraries = PowerPCDesc PowerPCInfo MC MCParser Support
+required_libraries = MC MCParser PowerPCDesc PowerPCInfo Support
add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index fe83fe1438ce..d7066d58709a 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -9,21 +9,23 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCMCExpr.h"
-#include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/MC/MCStreamer.h"
+#include "PPCTargetStreamer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -94,6 +96,44 @@ static unsigned VRegs[32] = {
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
PPC::V28, PPC::V29, PPC::V30, PPC::V31
};
+static unsigned VSRegs[64] = {
+ PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
+ PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
+ PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
+ PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
+ PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
+ PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
+ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
+ PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
+
+ PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
+ PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
+ PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
+ PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
+ PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
+ PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
+ PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
+ PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+};
+static unsigned VSFRegs[64] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+ PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+ PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+ PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+ PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+ PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+ PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+ PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+ PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
static unsigned CRBITRegs[32] = {
PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
@@ -177,6 +217,7 @@ class PPCAsmParser : public MCTargetAsmParser {
MCAsmParser &Parser;
const MCInstrInfo &MII;
bool IsPPC64;
+ bool IsDarwin;
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
@@ -185,30 +226,34 @@ class PPCAsmParser : public MCTargetAsmParser {
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
bool isPPC64() const { return IsPPC64; }
+ bool isDarwin() const { return IsDarwin; }
bool MatchRegisterName(const AsmToken &Tok,
unsigned &RegNo, int64_t &IntVal);
- virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
const MCExpr *ExtractModifierFromExpr(const MCExpr *E,
PPCMCExpr::VariantKind &Variant);
const MCExpr *FixupVariantKind(const MCExpr *E);
bool ParseExpression(const MCExpr *&EVal);
+ bool ParseDarwinExpression(const MCExpr *&EVal);
- bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool ParseOperand(OperandVector &Operands);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveTC(unsigned Size, SMLoc L);
bool ParseDirectiveMachine(SMLoc L);
+ bool ParseDarwinDirectiveMachine(SMLoc L);
+ bool ParseDirectiveAbiVersion(SMLoc L);
+ bool ParseDirectiveLocalEntry(SMLoc L);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm);
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
+ bool MatchingInlineAsm) override;
- void ProcessInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+ void ProcessInstruction(MCInst &Inst, const OperandVector &Ops);
/// @name Auto-generated Match Functions
/// {
@@ -221,27 +266,29 @@ class PPCAsmParser : public MCTargetAsmParser {
public:
PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &_MII)
+ const MCInstrInfo &_MII,
+ const MCTargetOptions &Options)
: MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) {
// Check for 64-bit vs. 32-bit pointer mode.
Triple TheTriple(STI.getTargetTriple());
IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
TheTriple.getArch() == Triple::ppc64le);
+ IsDarwin = TheTriple.isMacOSX();
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
- virtual bool ParseInstruction(ParseInstructionInfo &Info,
- StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
- virtual bool ParseDirective(AsmToken DirectiveID);
+ bool ParseDirective(AsmToken DirectiveID) override;
- unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind);
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+ unsigned Kind) override;
- virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
- MCSymbolRefExpr::VariantKind,
- MCContext &Ctx);
+ const MCExpr *applyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind,
+ MCContext &Ctx) override;
};
/// PPCOperand - Instances of this class represent a parsed PowerPC machine
@@ -306,10 +353,10 @@ public:
}
/// getStartLoc - Get the location of the first token of this operand.
- SMLoc getStartLoc() const { return StartLoc; }
+ SMLoc getStartLoc() const override { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
- SMLoc getEndLoc() const { return EndLoc; }
+ SMLoc getEndLoc() const override { return EndLoc; }
/// isPPC64 - True if this operand is for an instruction in 64-bit mode.
bool isPPC64() const { return IsPPC64; }
@@ -334,11 +381,16 @@ public:
return TLSReg.Sym;
}
- unsigned getReg() const {
+ unsigned getReg() const override {
assert(isRegNumber() && "Invalid access!");
return (unsigned) Imm.Val;
}
+ unsigned getVSReg() const {
+ assert(isVSRegNumber() && "Invalid access!");
+ return (unsigned) Imm.Val;
+ }
+
unsigned getCCReg() const {
assert(isCCRegNumber() && "Invalid access!");
return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal);
@@ -354,8 +406,9 @@ public:
return 7 - countTrailingZeros<uint64_t>(Imm.Val);
}
- bool isToken() const { return Kind == Token; }
- bool isImm() const { return Kind == Immediate || Kind == Expression; }
+ bool isToken() const override { return Kind == Token; }
+ bool isImm() const override { return Kind == Immediate || Kind == Expression; }
+ bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
@@ -376,6 +429,7 @@ public:
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 3) == 0); }
bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
+ bool isVSRegNumber() const { return Kind == Immediate && isUInt<6>(getImm()); }
bool isCCRegNumber() const { return (Kind == Expression
&& isUInt<3>(getExprCRVal())) ||
(Kind == Immediate
@@ -386,8 +440,8 @@ public:
&& isUInt<5>(getImm())); }
bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) &&
isPowerOf2_32(getImm()); }
- bool isMem() const { return false; }
- bool isReg() const { return false; }
+ bool isMem() const override { return false; }
+ bool isReg() const override { return false; }
void addRegOperands(MCInst &Inst, unsigned N) const {
llvm_unreachable("addRegOperands");
@@ -442,6 +496,16 @@ public:
Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()]));
}
+ void addRegVSRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VSRegs[getVSReg()]));
+ }
+
+ void addRegVSFRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()]));
+ }
+
void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()]));
@@ -483,10 +547,11 @@ public:
return StringRef(Tok.Data, Tok.Length);
}
- virtual void print(raw_ostream &OS) const;
+ void print(raw_ostream &OS) const override;
- static PPCOperand *CreateToken(StringRef Str, SMLoc S, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(Token);
+ static std::unique_ptr<PPCOperand> CreateToken(StringRef Str, SMLoc S,
+ bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -495,22 +560,27 @@ public:
return Op;
}
- static PPCOperand *CreateTokenWithStringCopy(StringRef Str, SMLoc S,
- bool IsPPC64) {
+ static std::unique_ptr<PPCOperand>
+ CreateTokenWithStringCopy(StringRef Str, SMLoc S, bool IsPPC64) {
// Allocate extra memory for the string and copy it.
+ // FIXME: This is incorrect, Operands are owned by unique_ptr with a default
+ // deleter which will destroy them by simply using "delete", not correctly
+ // calling operator delete on this extra memory after calling the dtor
+ // explicitly.
void *Mem = ::operator new(sizeof(PPCOperand) + Str.size());
- PPCOperand *Op = new (Mem) PPCOperand(Token);
- Op->Tok.Data = (const char *)(Op + 1);
+ std::unique_ptr<PPCOperand> Op(new (Mem) PPCOperand(Token));
+ Op->Tok.Data = (const char *)(Op.get() + 1);
Op->Tok.Length = Str.size();
- std::memcpy((char *)(Op + 1), Str.data(), Str.size());
+ std::memcpy((void *)Op->Tok.Data, Str.data(), Str.size());
Op->StartLoc = S;
Op->EndLoc = S;
Op->IsPPC64 = IsPPC64;
return Op;
}
- static PPCOperand *CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(Immediate);
+ static std::unique_ptr<PPCOperand> CreateImm(int64_t Val, SMLoc S, SMLoc E,
+ bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -518,9 +588,9 @@ public:
return Op;
}
- static PPCOperand *CreateExpr(const MCExpr *Val,
- SMLoc S, SMLoc E, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(Expression);
+ static std::unique_ptr<PPCOperand> CreateExpr(const MCExpr *Val, SMLoc S,
+ SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Expression);
Op->Expr.Val = Val;
Op->Expr.CRVal = EvaluateCRExpr(Val);
Op->StartLoc = S;
@@ -529,9 +599,9 @@ public:
return Op;
}
- static PPCOperand *CreateTLSReg(const MCSymbolRefExpr *Sym,
- SMLoc S, SMLoc E, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(TLSRegister);
+ static std::unique_ptr<PPCOperand>
+ CreateTLSReg(const MCSymbolRefExpr *Sym, SMLoc S, SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(TLSRegister);
Op->TLSReg.Sym = Sym;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -539,8 +609,8 @@ public:
return Op;
}
- static PPCOperand *CreateFromMCExpr(const MCExpr *Val,
- SMLoc S, SMLoc E, bool IsPPC64) {
+ static std::unique_ptr<PPCOperand>
+ CreateFromMCExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) {
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Val))
return CreateImm(CE->getValue(), S, E, IsPPC64);
@@ -571,10 +641,8 @@ void PPCOperand::print(raw_ostream &OS) const {
}
}
-
-void PPCAsmParser::
-ProcessInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+void PPCAsmParser::ProcessInstruction(MCInst &Inst,
+ const OperandVector &Operands) {
int Opcode = Inst.getOpcode();
switch (Opcode) {
case PPC::LAx: {
@@ -854,11 +922,10 @@ ProcessInstruction(MCInst &Inst,
}
}
-bool PPCAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
+bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
@@ -867,7 +934,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Post-process instructions (typically extended mnemonics)
ProcessInstruction(Inst, Operands);
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
+ Out.EmitInstruction(Inst, STI);
return false;
case Match_MissingFeature:
return Error(IDLoc, "instruction use requires an option to be enabled");
@@ -879,7 +946,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((PPCOperand*)Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((PPCOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
}
@@ -960,7 +1027,7 @@ ExtractModifierFromExpr(const MCExpr *E,
switch (E->getKind()) {
case MCExpr::Target:
case MCExpr::Constant:
- return 0;
+ return nullptr;
case MCExpr::SymbolRef: {
const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
@@ -988,7 +1055,7 @@ ExtractModifierFromExpr(const MCExpr *E,
Variant = PPCMCExpr::VK_PPC_HIGHESTA;
break;
default:
- return 0;
+ return nullptr;
}
return MCSymbolRefExpr::Create(&SRE->getSymbol(), Context);
@@ -998,7 +1065,7 @@ ExtractModifierFromExpr(const MCExpr *E,
const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
const MCExpr *Sub = ExtractModifierFromExpr(UE->getSubExpr(), Variant);
if (!Sub)
- return 0;
+ return nullptr;
return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context);
}
@@ -1009,7 +1076,7 @@ ExtractModifierFromExpr(const MCExpr *E,
const MCExpr *RHS = ExtractModifierFromExpr(BE->getRHS(), RHSVariant);
if (!LHS && !RHS)
- return 0;
+ return nullptr;
if (!LHS) LHS = BE->getLHS();
if (!RHS) RHS = BE->getRHS();
@@ -1021,7 +1088,7 @@ ExtractModifierFromExpr(const MCExpr *E,
else if (LHSVariant == RHSVariant)
Variant = LHSVariant;
else
- return 0;
+ return nullptr;
return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context);
}
@@ -1081,10 +1148,16 @@ FixupVariantKind(const MCExpr *E) {
llvm_unreachable("Invalid expression kind!");
}
-/// Parse an expression. This differs from the default "parseExpression"
-/// in that it handles complex \code @l/@ha \endcode modifiers.
+/// ParseExpression. This differs from the default "parseExpression" in that
+/// it handles modifiers.
bool PPCAsmParser::
ParseExpression(const MCExpr *&EVal) {
+
+ if (isDarwin())
+ return ParseDarwinExpression(EVal);
+
+ // (ELF Platforms)
+ // Handle \code @l/@ha \endcode
if (getParser().parseExpression(EVal))
return true;
@@ -1098,12 +1171,59 @@ ParseExpression(const MCExpr *&EVal) {
return false;
}
+/// ParseDarwinExpression. (MachO Platforms)
+/// This differs from the default "parseExpression" in that it handles detection
+/// of the \code hi16(), ha16() and lo16() \endcode modifiers. At present,
+/// parseExpression() doesn't recognise the modifiers when in the Darwin/MachO
+/// syntax form so it is done here. TODO: Determine if there is merit in arranging
+/// for this to be done at a higher level.
bool PPCAsmParser::
-ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ParseDarwinExpression(const MCExpr *&EVal) {
+ PPCMCExpr::VariantKind Variant = PPCMCExpr::VK_PPC_None;
+ switch (getLexer().getKind()) {
+ default:
+ break;
+ case AsmToken::Identifier:
+ // Compiler-generated Darwin identifiers begin with L,l,_ or "; thus
+ // something starting with any other char should be part of the
+ // asm syntax. If handwritten asm includes an identifier like lo16,
+ // then all bets are off - but no-one would do that, right?
+ StringRef poss = Parser.getTok().getString();
+ if (poss.equals_lower("lo16")) {
+ Variant = PPCMCExpr::VK_PPC_LO;
+ } else if (poss.equals_lower("hi16")) {
+ Variant = PPCMCExpr::VK_PPC_HI;
+ } else if (poss.equals_lower("ha16")) {
+ Variant = PPCMCExpr::VK_PPC_HA;
+ }
+ if (Variant != PPCMCExpr::VK_PPC_None) {
+ Parser.Lex(); // Eat the xx16
+ if (getLexer().isNot(AsmToken::LParen))
+ return Error(Parser.getTok().getLoc(), "expected '('");
+ Parser.Lex(); // Eat the '('
+ }
+ break;
+ }
+
+ if (getParser().parseExpression(EVal))
+ return true;
+
+ if (Variant != PPCMCExpr::VK_PPC_None) {
+ if (getLexer().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "expected ')'");
+ Parser.Lex(); // Eat the ')'
+ EVal = PPCMCExpr::Create(Variant, EVal, false, getParser().getContext());
+ }
+ return false;
+}
+
+/// ParseOperand
+/// This handles registers in the form 'NN', '%rNN' for ELF platforms and
+/// rNN for MachO.
+bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
const MCExpr *EVal;
- PPCOperand *Op;
// Attempt to parse the next token as an immediate
switch (getLexer().getKind()) {
@@ -1115,20 +1235,35 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
int64_t IntVal;
if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
Parser.Lex(); // Eat the identifier token.
- Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
return false;
}
return Error(S, "invalid register name");
+ case AsmToken::Identifier:
+ // Note that non-register-name identifiers from the compiler will begin
+ // with '_', 'L'/'l' or '"'. Of course, handwritten asm could include
+ // identifiers like r31foo - so we fall through in the event that parsing
+ // a register name fails.
+ if (isDarwin()) {
+ unsigned RegNo;
+ int64_t IntVal;
+ if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
+ Parser.Lex(); // Eat the identifier token.
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
+ return false;
+ }
+ }
+ // Fall-through to process non-register-name identifiers as expression.
// All other expressions
case AsmToken::LParen:
case AsmToken::Plus:
case AsmToken::Minus:
case AsmToken::Integer:
- case AsmToken::Identifier:
case AsmToken::Dot:
case AsmToken::Dollar:
+ case AsmToken::Exclaim:
+ case AsmToken::Tilde:
if (!ParseExpression(EVal))
break;
/* fall through */
@@ -1137,8 +1272,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
// Push the parsed operand into the list of operands
- Op = PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64()));
// Check whether this is a TLS call expression
bool TLSCall = false;
@@ -1157,8 +1291,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
E = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
- Op = PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64()));
}
// Otherwise, check for D-form memory operands
@@ -1177,11 +1310,25 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
break;
case AsmToken::Integer:
- if (getParser().parseAbsoluteExpression(IntVal) ||
+ if (!isDarwin()) {
+ if (getParser().parseAbsoluteExpression(IntVal) ||
IntVal < 0 || IntVal > 31)
return Error(S, "invalid register number");
+ } else {
+ return Error(S, "unexpected integer value");
+ }
break;
+ case AsmToken::Identifier:
+ if (isDarwin()) {
+ unsigned RegNo;
+ if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
+ Parser.Lex(); // Eat the identifier token.
+ break;
+ }
+ }
+ // Fall-through..
+
default:
return Error(S, "invalid memory operand");
}
@@ -1191,17 +1338,15 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
E = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
- Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
}
return false;
}
/// Parse an instruction mnemonic followed by its operands.
-bool PPCAsmParser::
-ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) {
// The first operand is the token for the instruction name.
// If the next character is a '+' or '-', we need to add it to the
// instruction name, to match what TableGen is doing.
@@ -1261,14 +1406,23 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
/// ParseDirective parses the PPC specific directives
bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
- if (IDVal == ".word")
- return ParseDirectiveWord(2, DirectiveID.getLoc());
- if (IDVal == ".llong")
- return ParseDirectiveWord(8, DirectiveID.getLoc());
- if (IDVal == ".tc")
- return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
- if (IDVal == ".machine")
- return ParseDirectiveMachine(DirectiveID.getLoc());
+ if (!isDarwin()) {
+ if (IDVal == ".word")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ if (IDVal == ".llong")
+ return ParseDirectiveWord(8, DirectiveID.getLoc());
+ if (IDVal == ".tc")
+ return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
+ if (IDVal == ".machine")
+ return ParseDirectiveMachine(DirectiveID.getLoc());
+ if (IDVal == ".abiversion")
+ return ParseDirectiveAbiVersion(DirectiveID.getLoc());
+ if (IDVal == ".localentry")
+ return ParseDirectiveLocalEntry(DirectiveID.getLoc());
+ } else {
+ if (IDVal == ".machine")
+ return ParseDarwinDirectiveMachine(DirectiveID.getLoc());
+ }
return true;
}
@@ -1279,7 +1433,7 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
for (;;) {
const MCExpr *Value;
if (getParser().parseExpression(Value))
- return true;
+ return false;
getParser().getStreamer().EmitValue(Value, Size);
@@ -1303,8 +1457,10 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
while (getLexer().isNot(AsmToken::EndOfStatement)
&& getLexer().isNot(AsmToken::Comma))
Parser.Lex();
- if (getLexer().isNot(AsmToken::Comma))
- return Error(L, "unexpected token in directive");
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
Parser.Lex();
// Align to word size.
@@ -1314,12 +1470,14 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
return ParseDirectiveWord(Size, L);
}
-/// ParseDirectiveMachine
+/// ParseDirectiveMachine (ELF platforms)
/// ::= .machine [ cpu | "push" | "pop" ]
bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
if (getLexer().isNot(AsmToken::Identifier) &&
- getLexer().isNot(AsmToken::String))
- return Error(L, "unexpected token in directive");
+ getLexer().isNot(AsmToken::String)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
StringRef CPU = Parser.getTok().getIdentifier();
Parser.Lex();
@@ -1329,15 +1487,118 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
// Implement ".machine any" (by doing nothing) for the benefit
// of existing assembler code. Likewise, we can then implement
// ".machine push" and ".machine pop" as no-op.
- if (CPU != "any" && CPU != "push" && CPU != "pop")
- return Error(L, "unrecognized machine type");
+ if (CPU != "any" && CPU != "push" && CPU != "pop") {
+ Error(L, "unrecognized machine type");
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ TStreamer.emitMachine(CPU);
+
+ return false;
+}
+
+/// ParseDarwinDirectiveMachine (Mach-o platforms)
+/// ::= .machine cpu-identifier
+bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
+ if (getLexer().isNot(AsmToken::Identifier) &&
+ getLexer().isNot(AsmToken::String)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ StringRef CPU = Parser.getTok().getIdentifier();
+ Parser.Lex();
+
+ // FIXME: this is only the 'default' set of cpu variants.
+ // However we don't act on this information at present, this is simply
+ // allowing parsing to proceed with minimal sanity checking.
+ if (CPU != "ppc7400" && CPU != "ppc" && CPU != "ppc64") {
+ Error(L, "unrecognized cpu type");
+ return false;
+ }
+
+ if (isPPC64() && (CPU == "ppc7400" || CPU == "ppc")) {
+ Error(L, "wrong cpu type specified for 64bit");
+ return false;
+ }
+ if (!isPPC64() && CPU == "ppc64") {
+ Error(L, "wrong cpu type specified for 32bit");
+ return false;
+ }
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return Error(L, "unexpected token in directive");
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
return false;
}
+/// ParseDirectiveAbiVersion
+/// ::= .abiversion constant-expression
+bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) {
+ int64_t AbiVersion;
+ if (getParser().parseAbsoluteExpression(AbiVersion)){
+ Error(L, "expected constant expression");
+ return false;
+ }
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ TStreamer.emitAbiVersion(AbiVersion);
+
+ return false;
+}
+
+/// ParseDirectiveLocalEntry
+/// ::= .localentry symbol, expression
+bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name)) {
+ Error(L, "expected identifier in directive");
+ return false;
+ }
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+ Lex();
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(L, "expected expression");
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ TStreamer.emitLocalEntry(Sym, Expr);
+
+ return false;
+}
+
+
+
/// Force static initialization.
extern "C" void LLVMInitializePowerPCAsmParser() {
RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target);
@@ -1351,7 +1612,7 @@ extern "C" void LLVMInitializePowerPCAsmParser() {
// Define this matcher function after the auto-generated include so we
// have the match class enum definitions.
-unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
unsigned Kind) {
// If the kind is a token for a literal immediate, check if our asm
// operand matches. This is for InstAliases which have a fixed-value
@@ -1365,8 +1626,8 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
default: return Match_InvalidOperand;
}
- PPCOperand *Op = static_cast<PPCOperand*>(AsmOp);
- if (Op->isImm() && Op->getImm() == ImmVal)
+ PPCOperand &Op = static_cast<PPCOperand &>(AsmOp);
+ if (Op.isImm() && Op.getImm() == ImmVal)
return Match_Success;
return Match_InvalidOperand;
@@ -1392,6 +1653,6 @@ PPCAsmParser::applyModifierToExpr(const MCExpr *E,
case MCSymbolRefExpr::VK_PPC_HIGHESTA:
return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx);
default:
- return 0;
+ return nullptr;
}
}
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 9a763f53a2d1..ea4de63a2448 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS PPC.td)
tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter)
+tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info)
@@ -34,9 +35,8 @@ add_llvm_target(PowerPCCodeGen
PPCSelectionDAGInfo.cpp
)
-add_dependencies(LLVMPowerPCCodeGen PowerPCCommonTableGen intrinsics_gen)
-
add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/Disassembler/CMakeLists.txt b/lib/Target/PowerPC/Disassembler/CMakeLists.txt
new file mode 100644
index 000000000000..ca457df88d3e
--- /dev/null
+++ b/lib/Target/PowerPC/Disassembler/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMPowerPCDisassembler
+ PPCDisassembler.cpp
+ )
diff --git a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..b0978c227ae9
--- /dev/null
+++ b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===-- ./lib/Target/PowerPC/Disassembler/LLVMBuild.txt ---------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PowerPCDisassembler
+parent = PowerPC
+required_libraries = MC PowerPCInfo Support
+add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/Disassembler/Makefile b/lib/Target/PowerPC/Disassembler/Makefile
new file mode 100644
index 000000000000..86e3b4752207
--- /dev/null
+++ b/lib/Target/PowerPC/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===-- lib/Target/PowerPC/Disassembler/Makefile -----------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCDisassembler
+
+# Hack: we need to include 'main' PPC target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
new file mode 100644
index 000000000000..a2305a9efc71
--- /dev/null
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -0,0 +1,348 @@
+//===------ PPCDisassembler.cpp - Disassembler for PowerPC ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+class PPCDisassembler : public MCDisassembler {
+public:
+ PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
+ virtual ~PPCDisassembler() {}
+
+ // Override MCDisassembler.
+ virtual DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const override;
+};
+} // end anonymous namespace
+
+static MCDisassembler *createPPCDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new PPCDisassembler(STI, Ctx);
+}
+
+extern "C" void LLVMInitializePowerPCDisassembler() {
+ // Register the disassembler for each target.
+ TargetRegistry::RegisterMCDisassembler(ThePPC32Target,
+ createPPCDisassembler);
+ TargetRegistry::RegisterMCDisassembler(ThePPC64Target,
+ createPPCDisassembler);
+ TargetRegistry::RegisterMCDisassembler(ThePPC64LETarget,
+ createPPCDisassembler);
+}
+
+// FIXME: These can be generated by TableGen from the existing register
+// encoding values!
+
+static const unsigned CRRegs[] = {
+ PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
+ PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
+};
+
+static const unsigned CRBITRegs[] = {
+ PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
+ PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+ PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
+ PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
+ PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
+};
+
+static const unsigned FRegs[] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31
+};
+
+static const unsigned VRegs[] = {
+ PPC::V0, PPC::V1, PPC::V2, PPC::V3,
+ PPC::V4, PPC::V5, PPC::V6, PPC::V7,
+ PPC::V8, PPC::V9, PPC::V10, PPC::V11,
+ PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+static const unsigned VSRegs[] = {
+ PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
+ PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
+ PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
+ PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
+ PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
+ PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
+ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
+ PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
+
+ PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
+ PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
+ PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
+ PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
+ PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
+ PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
+ PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
+ PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+};
+
+static const unsigned VSFRegs[] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+ PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+ PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+ PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+ PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+ PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+ PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+ PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+ PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+
+static const unsigned GPRegs[] = {
+ PPC::R0, PPC::R1, PPC::R2, PPC::R3,
+ PPC::R4, PPC::R5, PPC::R6, PPC::R7,
+ PPC::R8, PPC::R9, PPC::R10, PPC::R11,
+ PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+
+static const unsigned GP0Regs[] = {
+ PPC::ZERO, PPC::R1, PPC::R2, PPC::R3,
+ PPC::R4, PPC::R5, PPC::R6, PPC::R7,
+ PPC::R8, PPC::R9, PPC::R10, PPC::R11,
+ PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+
+static const unsigned G8Regs[] = {
+ PPC::X0, PPC::X1, PPC::X2, PPC::X3,
+ PPC::X4, PPC::X5, PPC::X6, PPC::X7,
+ PPC::X8, PPC::X9, PPC::X10, PPC::X11,
+ PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+
+template <std::size_t N>
+static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
+ const unsigned (&Regs)[N]) {
+ assert(RegNo < N && "Invalid register number");
+ Inst.addOperand(MCOperand::CreateReg(Regs[RegNo]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, CRRegs);
+}
+
+static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, CRBITRegs);
+}
+
+static DecodeStatus DecodeF4RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, FRegs);
+}
+
+static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, FRegs);
+}
+
+static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VRegs);
+}
+
+static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSRegs);
+}
+
+static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSFRegs);
+}
+
+static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, GPRegs);
+}
+
+static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, GP0Regs);
+}
+
+static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, G8Regs);
+}
+
+#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
+#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
+
+template<unsigned N>
+static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ return MCDisassembler::Success;
+}
+
+template<unsigned N>
+static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ // Decode the memri field (imm, reg), which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+
+ uint64_t Base = Imm >> 16;
+ uint64_t Disp = Imm & 0xFFFF;
+
+ assert(Base < 32 && "Invalid base register");
+
+ switch (Inst.getOpcode()) {
+ default: break;
+ case PPC::LBZU:
+ case PPC::LHAU:
+ case PPC::LHZU:
+ case PPC::LWZU:
+ case PPC::LFSU:
+ case PPC::LFDU:
+ // Add the tied output operand.
+ Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ break;
+ case PPC::STBU:
+ case PPC::STHU:
+ case PPC::STWU:
+ case PPC::STFSU:
+ case PPC::STFDU:
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base]));
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp)));
+ Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ // Decode the memrix field (imm, reg), which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+
+ uint64_t Base = Imm >> 14;
+ uint64_t Disp = Imm & 0x3FFF;
+
+ assert(Base < 32 && "Invalid base register");
+
+ if (Inst.getOpcode() == PPC::LDU)
+ // Add the tied output operand.
+ Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ else if (Inst.getOpcode() == PPC::STDU)
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base]));
+
+ Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp << 2)));
+ Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ // The cr bit encoding is 0x80 >> cr_reg_num.
+
+ unsigned Zeros = countTrailingZeros(Imm);
+ assert(Zeros < 8 && "Invalid CR bit value");
+
+ Inst.addOperand(MCOperand::CreateReg(CRRegs[7 - Zeros]));
+ return MCDisassembler::Success;
+}
+
+#include "PPCGenDisassemblerTables.inc"
+
+DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ // Get the four bytes of the instruction.
+ uint8_t Bytes[4];
+ Size = 4;
+ if (Region.readBytes(Address, Size, Bytes) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ // The instruction is big-endian encoded.
+ uint32_t Inst = (Bytes[0] << 24) |
+ (Bytes[1] << 16) |
+ (Bytes[2] << 8) |
+ (Bytes[3] << 0);
+
+ return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
+}
+
diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
index a605cc4b5f27..ab30a110f40e 100644
--- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
+++ b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
@@ -1,7 +1,3 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
add_llvm_library(LLVMPowerPCAsmPrinter
PPCInstPrinter.cpp
)
-
-add_dependencies(LLVMPowerPCAsmPrinter PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 8281b5ca03c8..7279b091b34c 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
@@ -23,6 +22,8 @@
#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
// FIXME: Once the integrated assembler supports full register names, tie this
// to the verbose-asm setting.
static cl::opt<bool>
@@ -149,6 +150,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
case PPC::PRED_NU:
O << "nu";
return;
+ case PPC::PRED_BIT_SET:
+ case PPC::PRED_BIT_UNSET:
+ llvm_unreachable("Invalid use of bit predicate code");
}
llvm_unreachable("Invalid predicate code");
}
@@ -184,6 +188,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
case PPC::PRED_NU_PLUS:
O << "+";
return;
+ case PPC::PRED_BIT_SET:
+ case PPC::PRED_BIT_UNSET:
+ llvm_unreachable("Invalid use of bit predicate code");
}
llvm_unreachable("Invalid predicate code");
}
@@ -193,6 +200,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo+1, O);
}
+void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 3 && "Invalid u2imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
int Value = MI->getOperand(OpNo).getImm();
@@ -310,7 +324,10 @@ static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
case 'r':
case 'f':
- case 'v': return RegName + 1;
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2;
}
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 8a4c03d64540..211a62813e7a 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -31,8 +31,8 @@ public:
return IsDarwin;
}
- virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
@@ -41,9 +41,9 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O, const char *Modifier = 0);
-
+ raw_ostream &O, const char *Modifier = nullptr);
+ void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt
index 7b3e843507a8..9d173d64b944 100644
--- a/lib/Target/PowerPC/LLVMBuild.txt
+++ b/lib/Target/PowerPC/LLVMBuild.txt
@@ -16,18 +16,20 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
name = PowerPC
parent = Target
+has_asmparser = 1
has_asmprinter = 1
+has_disassembler = 1
has_jit = 1
[component_1]
type = Library
name = PowerPCCodeGen
parent = PowerPC
-required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target
+required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target TransformUtils
add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
index 3efa5ecf9096..3cea65ee4de6 100644
--- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -8,5 +8,3 @@ add_llvm_library(LLVMPowerPCDesc
PPCMachObjectWriter.cpp
PPCELFObjectWriter.cpp
)
-
-add_dependencies(LLVMPowerPCDesc PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 0d420815ea62..c54d5e75bdfd 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -9,7 +9,9 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
@@ -71,14 +73,18 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
namespace {
class PPCAsmBackend : public MCAsmBackend {
-const Target &TheTarget;
+ const Target &TheTarget;
+ bool IsLittleEndian;
public:
- PPCAsmBackend(const Target &T) : MCAsmBackend(), TheTarget(T) {}
+ PPCAsmBackend(const Target &T, bool isLittle) : MCAsmBackend(), TheTarget(T),
+ IsLittleEndian(isLittle) {}
- unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; }
+ unsigned getNumFixupKinds() const override {
+ return PPC::NumTargetFixupKinds;
+ }
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
- const static MCFixupKindInfo Infos[PPC::NumTargetFixupKinds] = {
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
+ const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
@@ -88,17 +94,27 @@ public:
{ "fixup_ppc_half16ds", 0, 14, 0 },
{ "fixup_ppc_nofixup", 0, 0, 0 }
};
+ const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = {
+ // name offset bits flags
+ { "fixup_ppc_br24", 2, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_brcond14", 2, 14, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_br24abs", 2, 24, 0 },
+ { "fixup_ppc_brcond14abs", 2, 14, 0 },
+ { "fixup_ppc_half16", 0, 16, 0 },
+ { "fixup_ppc_half16ds", 2, 14, 0 },
+ { "fixup_ppc_nofixup", 0, 0, 0 }
+ };
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
- return Infos[Kind - FirstTargetFixupKind];
+ return (IsLittleEndian? InfosLE : InfosBE)[Kind - FirstTargetFixupKind];
}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const {
+ uint64_t Value, bool IsPCRel) const override {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
@@ -108,11 +124,37 @@ public:
// For each byte of the fragment that the fixup touches, mask in the bits
// from the fixup value. The Value has been "split up" into the appropriate
// bitfields above.
- for (unsigned i = 0; i != NumBytes; ++i)
- Data[Offset + i] |= uint8_t((Value >> ((NumBytes - i - 1)*8)) & 0xff);
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ unsigned Idx = IsLittleEndian ? i : (NumBytes - 1 - i);
+ Data[Offset + i] |= uint8_t((Value >> (Idx * 8)) & 0xff);
+ }
+ }
+
+ void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ const MCValue &Target, uint64_t &Value,
+ bool &IsResolved) override {
+ switch ((PPC::Fixups)Fixup.getKind()) {
+ default: break;
+ case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
+ // If the target symbol has a local entry point we must not attempt
+ // to resolve the fixup directly. Emit a relocation and leave
+ // resolution of the final target address to the linker.
+ if (const MCSymbolRefExpr *A = Target.getSymA()) {
+ const MCSymbolData &Data = Asm.getSymbolData(A->getSymbol());
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ unsigned Other = MCELF::getOther(Data) << 2;
+ if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0)
+ IsResolved = false;
+ }
+ break;
+ }
}
- bool mayNeedRelaxation(const MCInst &Inst) const {
+ bool mayNeedRelaxation(const MCInst &Inst) const override {
// FIXME.
return false;
}
@@ -120,18 +162,18 @@ public:
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const {
+ const MCAsmLayout &Layout) const override {
// FIXME.
llvm_unreachable("relaxInstruction() unimplemented");
}
- void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
// FIXME.
llvm_unreachable("relaxInstruction() unimplemented");
}
- bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override {
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
OW->Write32(0x60000000);
@@ -152,6 +194,10 @@ public:
assert(Name == "ppc32" && "Unknown target name!");
return 4;
}
+
+ bool isLittleEndian() const {
+ return IsLittleEndian;
+ }
};
} // end anonymous namespace
@@ -160,9 +206,9 @@ public:
namespace {
class DarwinPPCAsmBackend : public PPCAsmBackend {
public:
- DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
+ DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { }
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
bool is64 = getPointerSize() == 8;
return createPPCMachObjectWriter(
OS,
@@ -170,26 +216,18 @@ namespace {
(is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
MachO::CPU_SUBTYPE_POWERPC_ALL);
}
-
- virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
- return false;
- }
};
class ELFPPCAsmBackend : public PPCAsmBackend {
uint8_t OSABI;
public:
- ELFPPCAsmBackend(const Target &T, uint8_t OSABI) :
- PPCAsmBackend(T), OSABI(OSABI) { }
+ ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) :
+ PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { }
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
bool is64 = getPointerSize() == 8;
- return createPPCELFObjectWriter(OS, is64, OSABI);
- }
-
- virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
- return false;
+ return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI);
}
};
@@ -202,5 +240,6 @@ MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
return new DarwinPPCAsmBackend(T);
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
- return new ELFPPCAsmBackend(T, OSABI);
+ bool IsLittleEndian = Triple(TT).getArch() == Triple::ppc64le;
+ return new ELFPPCAsmBackend(T, IsLittleEndian, OSABI);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 54de70eff71a..e93e95fc0751 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -9,7 +9,9 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "MCTargetDesc/PPCMCExpr.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCValue.h"
@@ -27,17 +29,11 @@ namespace {
virtual unsigned getRelocTypeInner(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const;
- virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsPCRel, bool IsRelocWithSymbol,
- int64_t Addend) const;
- virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
- const MCValue &Target,
- const MCFragment &F,
- const MCFixup &Fixup,
- bool IsPCRel) const;
- virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const;
+ unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel) const override;
+
+ bool needsRelocateWithSymbol(const MCSymbolData &SD,
+ unsigned Type) const override;
};
}
@@ -49,12 +45,39 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
PPCELFObjectWriter::~PPCELFObjectWriter() {
}
+static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target,
+ const MCFixup &Fixup) {
+ const MCExpr *Expr = Fixup.getValue();
+
+ if (Expr->getKind() != MCExpr::Target)
+ return Target.getAccessVariant();
+
+ switch (cast<PPCMCExpr>(Expr)->getKind()) {
+ case PPCMCExpr::VK_PPC_None:
+ return MCSymbolRefExpr::VK_None;
+ case PPCMCExpr::VK_PPC_LO:
+ return MCSymbolRefExpr::VK_PPC_LO;
+ case PPCMCExpr::VK_PPC_HI:
+ return MCSymbolRefExpr::VK_PPC_HI;
+ case PPCMCExpr::VK_PPC_HA:
+ return MCSymbolRefExpr::VK_PPC_HA;
+ case PPCMCExpr::VK_PPC_HIGHERA:
+ return MCSymbolRefExpr::VK_PPC_HIGHERA;
+ case PPCMCExpr::VK_PPC_HIGHER:
+ return MCSymbolRefExpr::VK_PPC_HIGHER;
+ case PPCMCExpr::VK_PPC_HIGHEST:
+ return MCSymbolRefExpr::VK_PPC_HIGHEST;
+ case PPCMCExpr::VK_PPC_HIGHESTA:
+ return MCSymbolRefExpr::VK_PPC_HIGHESTA;
+ }
+ llvm_unreachable("unknown PPCMCExpr kind");
+}
+
unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const
{
- MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
- MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+ MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Target, Fixup);
// determine the type of the relocation
unsigned Type;
@@ -64,7 +87,15 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
llvm_unreachable("Unimplemented");
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
- Type = ELF::R_PPC_REL24;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_REL24;
+ break;
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_PPC_PLTREL24;
+ break;
+ }
break;
case PPC::fixup_ppc_brcond14:
case PPC::fixup_ppc_brcond14abs:
@@ -356,64 +387,31 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
const MCFixup &Fixup,
- bool IsPCRel,
- bool IsRelocWithSymbol,
- int64_t Addend) const {
+ bool IsPCRel) const {
return getRelocTypeInner(Target, Fixup, IsPCRel);
}
-const MCSymbol *PPCELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
- const MCValue &Target,
- const MCFragment &F,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- assert(Target.getSymA() && "SymA cannot be 0");
- MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
- MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
-
- bool EmitThisSym;
- switch (Modifier) {
- // GOT references always need a relocation, even if the
- // target symbol is local.
- case MCSymbolRefExpr::VK_GOT:
- case MCSymbolRefExpr::VK_PPC_GOT_LO:
- case MCSymbolRefExpr::VK_PPC_GOT_HI:
- case MCSymbolRefExpr::VK_PPC_GOT_HA:
- EmitThisSym = true;
- break;
- default:
- EmitThisSym = false;
- break;
- }
-
- if (EmitThisSym)
- return &Target.getSymA()->getSymbol().AliasedSymbol();
- return NULL;
-}
-
-const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- assert(Target.getSymA() && "SymA cannot be 0");
- const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol();
-
- unsigned RelocType = getRelocTypeInner(Target, Fixup, IsPCRel);
-
- // The .odp creation emits a relocation against the symbol ".TOC." which
- // create a R_PPC64_TOC relocation. However the relocation symbol name
- // in final object creation should be NULL, since the symbol does not
- // really exist, it is just the reference to TOC base for the current
- // object file.
- bool EmitThisSym = RelocType != ELF::R_PPC64_TOC;
+bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
+ unsigned Type) const {
+ switch (Type) {
+ default:
+ return false;
- if (EmitThisSym && !Symbol.isTemporary())
- return &Symbol;
- return NULL;
+ case ELF::R_PPC_REL24:
+ // If the target symbol has a local entry point, we must keep the
+ // target symbol to preserve that information for the linker.
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ unsigned Other = MCELF::getOther(SD) << 2;
+ return (Other & ELF::STO_PPC64_LOCAL_MASK) != 0;
+ }
}
MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
bool Is64Bit,
+ bool IsLittleEndian,
uint8_t OSABI) {
MCELFObjectTargetWriter *MOTW = new PPCELFObjectWriter(Is64Bit, OSABI);
- return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
+ return createELFObjectWriter(MOTW, OS, IsLittleEndian);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 1d9c06406a4b..b95a2ac13e04 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -18,24 +18,6 @@ using namespace llvm;
void PPCMCAsmInfoDarwin::anchor() { }
-/// This version of the constructor is here to maintain ABI compatibility with
-/// LLVM 3.4.0
-PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
- if (is64Bit) {
- PointerSize = CalleeSaveStackSlotSize = 8;
- }
- IsLittleEndian = false;
-
- CommentString = ";";
- ExceptionsType = ExceptionHandling::DwarfCFI;
-
- if (!is64Bit)
- Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode.
-
- AssemblerDialect = 1; // New-Style mnemonics.
- SupportsDebugInformation= true; // Debug information.
-}
-
PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
if (is64Bit) {
PointerSize = CalleeSaveStackSlotSize = 8;
@@ -46,32 +28,32 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
ExceptionsType = ExceptionHandling::DwarfCFI;
if (!is64Bit)
- Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode.
+ Data64bitsDirective = nullptr; // We can't emit a 64-bit unit in PPC32 mode.
AssemblerDialect = 1; // New-Style mnemonics.
SupportsDebugInformation= true; // Debug information.
- // old assembler lacks some directives
+ // The installed assembler for OSX < 10.6 lacks some directives.
// FIXME: this should really be a check on the assembler characteristics
// rather than OS version
if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
HasWeakDefCanBeHiddenDirective = false;
+
+ UseIntegratedAssembler = true;
}
void PPCLinuxMCAsmInfo::anchor() { }
-PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
+PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
if (is64Bit) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
- IsLittleEndian = false;
+ IsLittleEndian = T.getArch() == Triple::ppc64le;
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
CommentString = "#";
- GlobalPrefix = "";
- PrivateGlobalPrefix = ".L";
// Uses '.section' before '.bss' directive
UsesELFSectionDirectiveForBSS = true;
@@ -89,7 +71,12 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
ExceptionsType = ExceptionHandling::DwarfCFI;
ZeroDirective = "\t.space\t";
- Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
+ Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr;
AssemblerDialect = 1; // New-Style mnemonics.
+
+ if (T.getOS() == llvm::Triple::FreeBSD ||
+ (T.getOS() == llvm::Triple::NetBSD && !is64Bit) ||
+ (T.getOS() == llvm::Triple::OpenBSD && !is64Bit))
+ UseIntegratedAssembler = true;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 633970ccc289..754330b2c60f 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -21,18 +21,15 @@ namespace llvm {
class Triple;
class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
- virtual void anchor();
+ void anchor() override;
public:
- /// This version of the constructor is here to maintain ABI compatibility
- /// with LLVM 3.4.0.
- explicit PPCMCAsmInfoDarwin(bool is64Bit);
explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
};
class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
- virtual void anchor();
+ void anchor() override;
public:
- explicit PPCLinuxMCAsmInfo(bool is64Bit);
+ explicit PPCLinuxMCAsmInfo(bool is64Bit, const Triple&);
};
} // namespace llvm
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 346a9beada90..435a93f78c1d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mccodeemitter"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
#include "llvm/ADT/Statistic.h"
@@ -26,6 +25,8 @@
#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
+#define DEBUG_TYPE "mccodeemitter"
+
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
namespace {
@@ -33,70 +34,113 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
PPCMCCodeEmitter(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
- const MCSubtargetInfo &STI;
+ const MCInstrInfo &MCII;
const MCContext &CTX;
- Triple TT;
+ bool IsLittleEndian;
public:
- PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
- MCContext &ctx)
- : STI(sti), CTX(ctx), TT(STI.getTargetTriple()) {
+ PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool isLittle)
+ : MCII(mcii), CTX(ctx), IsLittleEndian(isLittle) {
}
~PPCMCCodeEmitter() {}
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override {
// For fast-isel, a float COPY_TO_REGCLASS can survive this long.
// It's just a nop to keep the register classes happy, so don't
// generate anything.
unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
if (Opcode == TargetOpcode::COPY_TO_REGCLASS)
return;
- uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
+ uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
- // BL8_NOP etc. all have a size of 8 because of the following 'nop'.
- unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value!
- if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP ||
- Opcode == PPC::BL8_NOP_TLS)
- Size = 8;
-
- // Output the constant in big endian byte order.
- int ShiftValue = (Size * 8) - 8;
- for (unsigned i = 0; i != Size; ++i) {
- OS << (char)(Bits >> ShiftValue);
- Bits <<= 8;
+ // Output the constant in big/little endian byte order.
+ unsigned Size = Desc.getSize();
+ switch (Size) {
+ case 4:
+ if (IsLittleEndian) {
+ OS << (char)(Bits);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 24);
+ } else {
+ OS << (char)(Bits >> 24);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits);
+ }
+ break;
+ case 8:
+ // If we emit a pair of instructions, the first one is
+ // always in the top 32 bits, even on little-endian.
+ if (IsLittleEndian) {
+ OS << (char)(Bits >> 32);
+ OS << (char)(Bits >> 40);
+ OS << (char)(Bits >> 48);
+ OS << (char)(Bits >> 56);
+ OS << (char)(Bits);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 24);
+ } else {
+ OS << (char)(Bits >> 56);
+ OS << (char)(Bits >> 48);
+ OS << (char)(Bits >> 40);
+ OS << (char)(Bits >> 32);
+ OS << (char)(Bits >> 24);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits);
+ }
+ break;
+ default:
+ llvm_unreachable ("Invalid instruction size");
}
++MCNumEmitted; // Keep track of the # of mi's emitted.
@@ -110,14 +154,17 @@ MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
- return new PPCMCCodeEmitter(MCII, STI, Ctx);
+ Triple TT(STI.getTargetTriple());
+ bool IsLittleEndian = TT.getArch() == Triple::ppc64le;
+ return new PPCMCCodeEmitter(MCII, Ctx, IsLittleEndian);
}
unsigned PPCMCCodeEmitter::
getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
@@ -126,9 +173,10 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
}
unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
@@ -138,9 +186,10 @@ unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
unsigned PPCMCCodeEmitter::
getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
@@ -150,9 +199,10 @@ getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
unsigned PPCMCCodeEmitter::
getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
@@ -161,79 +211,87 @@ getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
}
unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the immediate field.
- Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
+ Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16));
return 0;
}
unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
// Encode (imm, reg) as a memri, which has the low 16-bits as the
// displacement and the next 5 bits as the register #.
assert(MI.getOperand(OpNo+1).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 16;
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 16;
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm())
- return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
+ return (getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF) | RegBits;
// Add a fixup for the displacement field.
- Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
+ Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16));
return RegBits;
}
unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
// Encode (imm, reg) as a memrix, which has the low 14-bits as the
// displacement and the next 5 bits as the register #.
assert(MI.getOperand(OpNo+1).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 14;
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 14;
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm())
- return ((getMachineOpValue(MI, MO, Fixups) >> 2) & 0x3FFF) | RegBits;
+ return ((getMachineOpValue(MI, MO, Fixups, STI) >> 2) & 0x3FFF) | RegBits;
// Add a fixup for the displacement field.
- Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
+ Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16ds));
return RegBits;
}
unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups);
+ if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups, STI);
// Add a fixup for the TLS register, which simply provides a relocation
// hint to the linker that this statement is part of a relocation sequence.
// Return the thread-pointer register's encoding.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_nofixup));
- return CTX.getRegisterInfo()->getEncodingValue(PPC::X13);
+ Triple TT(STI.getTargetTriple());
+ bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le;
+ return CTX.getRegisterInfo()->getEncodingValue(isPPC64 ? PPC::X13 : PPC::R2);
}
unsigned PPCMCCodeEmitter::getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
// For special TLS calls, we need two fixups; one for the branch target
// (__tls_get_addr), which we create via getDirectBrEncoding as usual,
// and one for the TLSGD or TLSLD symbol, which is emitted here.
const MCOperand &MO = MI.getOperand(OpNo+1);
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_nofixup));
- return getDirectBrEncoding(MI, OpNo, Fixups);
+ return getDirectBrEncoding(MI, OpNo, Fixups, STI);
}
unsigned PPCMCCodeEmitter::
get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 ||
MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) &&
@@ -244,7 +302,8 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
unsigned PPCMCCodeEmitter::
getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups) const {
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
if (MO.isReg()) {
// MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
// The GPR operand should come through here though.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index d7e84021595e..3ac0aca6b78c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -7,14 +7,16 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ppcmcexpr"
#include "PPCMCExpr.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
using namespace llvm;
+#define DEBUG_TYPE "ppcmcexpr"
+
const PPCMCExpr*
PPCMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
bool isDarwin, MCContext &Ctx) {
@@ -54,7 +56,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const {
MCValue Value;
- if (!Layout || !getSubExpr()->EvaluateAsRelocatable(Value, *Layout))
+ if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout))
return false;
if (Value.isAbsolute()) {
@@ -86,6 +88,9 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
}
Res = MCValue::get(Result);
} else {
+ if (!Layout)
+ return false;
+
MCContext &Context = Layout->getAssembler().getContext();
const MCSymbolRefExpr *Sym = Value.getSymA();
MCSymbolRefExpr::VariantKind Modifier = Sym->getKind();
@@ -123,33 +128,6 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
return true;
}
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbols_(BE->getLHS(), Asm);
- AddValueSymbols_(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void PPCMCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbols_(getSubExpr(), Asm);
+void PPCMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index e44c7c1adc67..bca408507e72 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -10,9 +10,9 @@
#ifndef PPCMCEXPR_H
#define PPCMCEXPR_H
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/MC/MCAsmLayout.h"
namespace llvm {
@@ -76,16 +76,16 @@ public:
/// @}
- void PrintImpl(raw_ostream &OS) const;
+ void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const;
- void AddValueSymbols(MCAssembler *) const;
- const MCSection *FindAssociatedSection() const {
+ const MCAsmLayout *Layout) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
+ const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
// There are no TLS PPCMCExprs at the moment.
- void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 6a5051840181..4c6780ff75a7 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -16,16 +16,22 @@
#include "PPCMCAsmInfo.h"
#include "PPCTargetStreamer.h"
#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_INSTRINFO_MC_DESC
#include "PPCGenInstrInfo.inc"
@@ -35,10 +41,9 @@
#define GET_REGINFO_MC_DESC
#include "PPCGenRegisterInfo.inc"
-using namespace llvm;
-
// Pin the vtable to this file.
PPCTargetStreamer::~PPCTargetStreamer() {}
+PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
static MCInstrInfo *createPPCMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
@@ -74,12 +79,12 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
if (TheTriple.isOSDarwin())
MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
else
- MAI = new PPCLinuxMCAsmInfo(isPPC64);
+ MAI = new PPCLinuxMCAsmInfo(isPPC64, TheTriple);
// Initial state of the frame pointer is R1.
unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
MCCFIInstruction Inst =
- MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(Reg, true), 0);
+ MCCFIInstruction::createDefCfa(nullptr, MRI.getDwarfRegNum(Reg, true), 0);
MAI->addInitialFrameState(Inst);
return MAI;
@@ -112,20 +117,90 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
formatted_raw_ostream &OS;
public:
- PPCTargetAsmStreamer(formatted_raw_ostream &OS) : OS(OS) {}
- virtual void emitTCEntry(const MCSymbol &S) {
+ PPCTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
+ : PPCTargetStreamer(S), OS(OS) {}
+ void emitTCEntry(const MCSymbol &S) override {
OS << "\t.tc ";
OS << S.getName();
OS << "[TC],";
OS << S.getName();
OS << '\n';
}
+ void emitMachine(StringRef CPU) override {
+ OS << "\t.machine " << CPU << '\n';
+ }
+ virtual void emitAbiVersion(int AbiVersion) override {
+ OS << "\t.abiversion " << AbiVersion << '\n';
+ }
+ virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) {
+ OS << "\t.localentry\t" << *S << ", " << *LocalOffset << '\n';
+ }
};
class PPCTargetELFStreamer : public PPCTargetStreamer {
- virtual void emitTCEntry(const MCSymbol &S) {
+public:
+ PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+ MCELFStreamer &getStreamer() {
+ return static_cast<MCELFStreamer &>(Streamer);
+ }
+ virtual void emitTCEntry(const MCSymbol &S) override {
// Creates a R_PPC64_TOC relocation
- Streamer->EmitSymbolValue(&S, 8);
+ Streamer.EmitSymbolValue(&S, 8);
+ }
+ void emitMachine(StringRef CPU) override {
+ // FIXME: Is there anything to do in here or does this directive only
+ // limit the parser?
+ }
+ virtual void emitAbiVersion(int AbiVersion) override {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ unsigned Flags = MCA.getELFHeaderEFlags();
+ Flags &= ~ELF::EF_PPC64_ABI;
+ Flags |= (AbiVersion & ELF::EF_PPC64_ABI);
+ MCA.setELFHeaderEFlags(Flags);
+ }
+ virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCSymbolData &Data = getStreamer().getOrCreateSymbolData(S);
+
+ int64_t Res;
+ if (!LocalOffset->EvaluateAsAbsolute(Res, MCA))
+ report_fatal_error(".localentry expression must be absolute.");
+
+ unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res);
+ if (Res != ELF::decodePPC64LocalEntryOffset(Encoded))
+ report_fatal_error(".localentry expression cannot be encoded.");
+
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ unsigned Other = MCELF::getOther(Data) << 2;
+ Other &= ~ELF::STO_PPC64_LOCAL_MASK;
+ Other |= Encoded;
+ MCELF::setOther(Data, Other >> 2);
+
+ // For GAS compatibility, unless we already saw a .abiversion directive,
+ // set e_flags to indicate ELFv2 ABI.
+ unsigned Flags = MCA.getELFHeaderEFlags();
+ if ((Flags & ELF::EF_PPC64_ABI) == 0)
+ MCA.setELFHeaderEFlags(Flags | 2);
+ }
+};
+
+class PPCTargetMachOStreamer : public PPCTargetStreamer {
+public:
+ PPCTargetMachOStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+ void emitTCEntry(const MCSymbol &S) override {
+ llvm_unreachable("Unknown pseudo-op: .tc");
+ }
+ void emitMachine(StringRef CPU) override {
+ // FIXME: We should update the CPUType, CPUSubType in the Object file if
+ // the new values are different from the defaults.
+ }
+ virtual void emitAbiVersion(int AbiVersion) override {
+ llvm_unreachable("Unknown pseudo-op: .abiversion");
+ }
+ virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) {
+ llvm_unreachable("Unknown pseudo-op: .localentry");
}
};
}
@@ -135,25 +210,31 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
raw_ostream &OS,
MCCodeEmitter *Emitter,
+ const MCSubtargetInfo &STI,
bool RelaxAll,
bool NoExecStack) {
- if (Triple(TT).isOSDarwin())
- return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+ if (Triple(TT).isOSDarwin()) {
+ MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+ new PPCTargetMachOStreamer(*S);
+ return S;
+ }
- PPCTargetStreamer *S = new PPCTargetELFStreamer();
- return createELFStreamer(Ctx, S, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ MCStreamer *S =
+ createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ new PPCTargetELFStreamer(*S);
+ return S;
}
static MCStreamer *
createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useLoc, bool useCFI,
- bool useDwarfDirectory, MCInstPrinter *InstPrint,
- MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) {
- PPCTargetStreamer *S = new PPCTargetAsmStreamer(OS);
-
- return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI,
- useDwarfDirectory, InstPrint, CE, TAB,
- ShowInst);
+ bool isVerboseAsm, bool useDwarfDirectory,
+ MCInstPrinter *InstPrint, MCCodeEmitter *CE,
+ MCAsmBackend *TAB, bool ShowInst) {
+
+ MCStreamer *S = llvm::createAsmStreamer(
+ Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
+ new PPCTargetAsmStreamer(*S, OS);
+ return S;
}
static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 0b0ca241e26b..474395b93637 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -46,6 +46,7 @@ MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
bool Is64Bit,
+ bool IsLittleEndian,
uint8_t OSABI);
/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer.
MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index bbafe2e78955..cff27baeb5ee 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -44,7 +44,7 @@ public:
void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue) {
+ uint64_t &FixedValue) override {
if (Writer->is64Bit()) {
report_fatal_error("Relocation emission for MachO/PPC64 unimplemented.");
} else
@@ -206,7 +206,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+ const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
if (!A_SD->getFragment())
report_fatal_error("symbol '" + A->getName() +
@@ -219,7 +219,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
uint32_t Value2 = 0;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
- MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+ const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
if (!B_SD->getFragment())
report_fatal_error("symbol '" + B->getSymbol().getName() +
@@ -324,7 +324,7 @@ void PPCMachObjectWriter::RecordPPCRelocation(
// this doesn't seem right for RIT_PPC_BR24
// Get the symbol data, if any.
- MCSymbolData *SD = 0;
+ const MCSymbolData *SD = nullptr;
if (Target.getSymA())
SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index 63facc5446d3..c2987b641c04 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -42,6 +42,10 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
case PPC::PRED_LE_PLUS: return PPC::PRED_GT_MINUS;
case PPC::PRED_NU_PLUS: return PPC::PRED_UN_MINUS;
case PPC::PRED_UN_PLUS: return PPC::PRED_NU_MINUS;
+
+ // Simple predicates for single condition-register bits.
+ case PPC::PRED_BIT_SET: return PPC::PRED_BIT_UNSET;
+ case PPC::PRED_BIT_UNSET: return PPC::PRED_BIT_SET;
}
llvm_unreachable("Unknown PPC branch opcode!");
}
@@ -72,6 +76,10 @@ PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) {
case PPC::PRED_LE_PLUS: return PPC::PRED_GE_PLUS;
case PPC::PRED_NU_PLUS: return PPC::PRED_NU_PLUS;
case PPC::PRED_UN_PLUS: return PPC::PRED_UN_PLUS;
+
+ case PPC::PRED_BIT_SET:
+ case PPC::PRED_BIT_UNSET:
+ llvm_unreachable("Invalid use of bit predicate code");
}
llvm_unreachable("Unknown PPC branch opcode!");
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index d498c2f8f446..10e328a8116e 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -48,7 +48,12 @@ namespace PPC {
PRED_GT_PLUS = (1 << 5) | 15,
PRED_NE_PLUS = (2 << 5) | 7,
PRED_UN_PLUS = (3 << 5) | 15,
- PRED_NU_PLUS = (3 << 5) | 7
+ PRED_NU_PLUS = (3 << 5) | 7,
+
+ // When dealing with individual condition-register bits, we have simple set
+ // and unset predicates.
+ PRED_BIT_SET = 1024,
+ PRED_BIT_UNSET = 1025
};
/// Invert the specified predicate. != -> ==, < -> >=.
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
index 21fdcd9350e1..c96674809b01 100644
--- a/lib/Target/PowerPC/Makefile
+++ b/lib/Target/PowerPC/Makefile
@@ -16,8 +16,9 @@ BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \
PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \
PPCGenInstrInfo.inc PPCGenDAGISel.inc \
PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \
- PPCGenMCCodeEmitter.inc PPCGenFastISel.inc
+ PPCGenMCCodeEmitter.inc PPCGenFastISel.inc \
+ PPCGenDisassemblerTables.inc
-DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc
+DIRS = AsmParser Disassembler InstPrinter TargetInfo MCTargetDesc
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index f0d5af24466e..ba5fa4f79b4e 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -23,6 +23,7 @@
namespace llvm {
class PPCTargetMachine;
+ class PassRegistry;
class FunctionPass;
class ImmutablePass;
class JITCodeEmitter;
@@ -35,6 +36,9 @@ namespace llvm {
FunctionPass *createPPCCTRLoopsVerify();
#endif
FunctionPass *createPPCEarlyReturnPass();
+ FunctionPass *createPPCVSXCopyPass();
+ FunctionPass *createPPCVSXCopyCleanupPass();
+ FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
@@ -45,6 +49,9 @@ namespace llvm {
/// \brief Creates an PPC-specific Target Transformation Info pass.
ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
+ void initializePPCVSXFMAMutatePass(PassRegistry&);
+ extern char &PPCVSXFMAMutateID;
+
namespace PPCII {
/// Target Operand Flag enum.
@@ -53,10 +60,11 @@ namespace llvm {
// PPC Specific MachineOperand flags.
MO_NO_FLAG,
- /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the "FOO$stub" symbol. This is used for calls
- /// and jumps to external functions on Tiger and earlier.
- MO_DARWIN_STUB = 1,
+ /// MO_PLT_OR_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" or "FOO@plt" symbol. This is
+ /// used for calls and jumps to external functions on Tiger and earlier, and
+ /// for PIC calls on Linux and ELF systems.
+ MO_PLT_OR_STUB = 1,
/// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
/// the function's picbase, e.g. lo16(symbol-picbase).
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 54e3d400a9d9..a9842b287cbb 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -46,11 +46,14 @@ def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", ""
def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
+def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
"Enable 64-bit instructions">;
def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
"Enable 64-bit registers usage for ppc32 [beta]">;
+def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true",
+ "Use condition-register bits individually">;
def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
"Enable Altivec instructions">;
def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
@@ -88,7 +91,8 @@ def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
"Enable QPX instructions">;
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
- "Enable VSX instructions">;
+ "Enable VSX instructions",
+ [FeatureAltivec]>;
def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
"Treat mftb as deprecated">;
@@ -110,6 +114,12 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
// their record-form variants.
class RecFormRel;
+// AltVSXFMARel - Filter class used to relate the primary addend-killing VSX
+// FMA instruction forms with their corresponding factor-killing forms.
+class AltVSXFMARel {
+ bit IsVSXFMAAlt = 0;
+}
+
//===----------------------------------------------------------------------===//
// Relation Map Definitions.
//===----------------------------------------------------------------------===//
@@ -140,6 +150,19 @@ def getNonRecordFormOpcode : InstrMapping {
let ValueCols = [["0"]];
}
+def getAltVSXFMAOpcode : InstrMapping {
+ let FilterClass = "AltVSXFMARel";
+ // Instructions with the same BaseName and Interpretation64Bit values
+ // form a row.
+ let RowFields = ["BaseName"];
+ // Instructions with the same RC value form a column.
+ let ColFields = ["IsVSXFMAAlt"];
+ // The key column are the (default) addend-killing instructions.
+ let KeyCol = ["0"];
+ // Value columns IsVSXFMAAlt=1
+ let ValueCols = [["1"]];
+}
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -153,12 +176,12 @@ include "PPCInstrInfo.td"
//
def : Processor<"generic", G3Itineraries, [Directive32]>;
-def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL,
- FeatureFRES, FeatureFRSQRTE,
- FeatureBookE, DeprecatedMFTB]>;
-def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL,
- FeatureFRES, FeatureFRSQRTE,
- FeatureBookE, DeprecatedMFTB]>;
+def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureBookE, DeprecatedMFTB]>;
+def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureBookE, DeprecatedMFTB]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
def : Processor<"603", G3Itineraries, [Directive603,
@@ -254,7 +277,7 @@ def : ProcessorModel<"pwr6x", G5Model,
FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, Feature64Bit,
DeprecatedMFTB, DeprecatedDST]>;
-def : ProcessorModel<"pwr7", G5Model,
+def : ProcessorModel<"pwr7", P7Model,
[DirectivePwr7, FeatureAltivec,
FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
@@ -263,6 +286,15 @@ def : ProcessorModel<"pwr7", G5Model,
FeaturePOPCNTD, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */,
DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr8", P7Model /* FIXME: Update to P8Model when available */,
+ [DirectivePwr8, FeatureAltivec,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : ProcessorModel<"ppc64", G5Model,
[Directive64, FeatureAltivec,
@@ -283,11 +315,11 @@ include "PPCCallingConv.td"
def PPCInstrInfo : InstrInfo {
let isLittleEndianEncoding = 1;
-}
-def PPCAsmWriter : AsmWriter {
- string AsmWriterClassName = "InstPrinter";
- bit isMCAsmWriter = 1;
+ // FIXME: Unset this when no longer needed!
+ let decodePositionallyEncodedOperands = 1;
+
+ let noNamedPositionallyEncodedOperands = 1;
}
def PPCAsmParser : AsmParser {
@@ -306,8 +338,7 @@ def PPCAsmParserVariant : AsmParserVariant {
def PPC : Target {
// Information about the instructions.
let InstructionSet = PPCInstrInfo;
-
- let AssemblyWriters = [PPCAsmWriter];
+
let AssemblyParsers = [PPCAsmParser];
let AssemblyParserVariants = [PPCAsmParserVariant];
}
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 2d92a112d5ef..6f67c598c754 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -16,27 +16,29 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asmprinter"
#include "PPC.h"
#include "InstPrinter/PPCInstPrinter.h"
-#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCMachineFunctionInfo.h"
#include "MCTargetDesc/PPCMCExpr.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "PPCTargetStreamer.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -54,12 +56,13 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "asmprinter"
+
namespace {
class PPCAsmPrinter : public AsmPrinter {
protected:
@@ -71,22 +74,22 @@ namespace {
: AsmPrinter(TM, Streamer),
Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "PowerPC Assembly Printer";
}
MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
- virtual void EmitInstruction(const MachineInstr *MI);
+ void EmitInstruction(const MachineInstr *MI) override;
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
+ raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
+ raw_ostream &O) override;
};
/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
@@ -95,15 +98,17 @@ namespace {
explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: PPCAsmPrinter(TM, Streamer) {}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Linux PPC Assembly Printer";
}
- bool doFinalization(Module &M);
+ bool doFinalization(Module &M) override;
+ void EmitStartOfAsmFile(Module &M) override;
- virtual void EmitFunctionEntryLabel();
+ void EmitFunctionEntryLabel() override;
- void EmitFunctionBodyEnd();
+ void EmitFunctionBodyStart() override;
+ void EmitFunctionBodyEnd() override;
};
/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
@@ -113,12 +118,12 @@ namespace {
explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: PPCAsmPrinter(TM, Streamer) {}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Darwin PPC Assembly Printer";
}
- bool doFinalization(Module &M);
- void EmitStartOfAsmFile(Module &M);
+ bool doFinalization(Module &M) override;
+ void EmitStartOfAsmFile(Module &M) override;
void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs);
};
@@ -130,7 +135,10 @@ static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
case 'r':
case 'f':
- case 'v': return RegName + 1;
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2;
}
@@ -139,6 +147,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
+ const DataLayout *DL = TM.getDataLayout();
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
@@ -157,37 +166,13 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
case MachineOperand::MO_MachineBasicBlock:
O << *MO.getMBB()->getSymbol();
return;
- case MachineOperand::MO_JumpTableIndex:
- O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
- << '_' << MO.getIndex();
- // FIXME: PIC relocation model
- return;
case MachineOperand::MO_ConstantPoolIndex:
- O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
<< '_' << MO.getIndex();
return;
case MachineOperand::MO_BlockAddress:
O << *GetBlockAddressSymbol(MO.getBlockAddress());
return;
- case MachineOperand::MO_ExternalSymbol: {
- // Computing the address of an external symbol, not calling it.
- if (TM.getRelocationModel() == Reloc::Static) {
- O << *GetExternalSymbolSymbol(MO.getSymbolName());
- return;
- }
-
- MCSymbol *NLPSym =
- OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+
- MO.getSymbolName()+"$non_lazy_ptr");
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym);
- if (StubSym.getPointer() == 0)
- StubSym = MachineModuleInfoImpl::
- StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
-
- O << *NLPSym;
- return;
- }
case MachineOperand::MO_GlobalAddress: {
// Computing the address of a global symbol, not calling it.
const GlobalValue *GV = MO.getGlobal();
@@ -197,21 +182,21 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
if (TM.getRelocationModel() != Reloc::Static &&
(GV->isDeclaration() || GV->isWeakForLinker())) {
if (!GV->hasHiddenVisibility()) {
- SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>()
.getGVStubEntry(SymToPrint);
- if (StubSym.getPointer() == 0)
+ if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
GV->hasAvailableExternallyLinkage()) {
- SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().
getHiddenGVStubEntry(SymToPrint);
- if (StubSym.getPointer() == 0)
+ if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else {
@@ -228,7 +213,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
}
default:
- O << "<unknown operand type: " << MO.getType() << ">";
+ O << "<unknown operand type: " << (unsigned)MO.getType() << ">";
return;
}
}
@@ -305,13 +290,13 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
-
+ const DataLayout *DL = TM.getDataLayout();
MCSymbol *&TOCEntry = TOC[Sym];
// To avoid name clash check if the name already exists.
- while (TOCEntry == 0) {
- if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
- "C" + Twine(TOCLabelID++)) == 0) {
+ while (!TOCEntry) {
+ if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) +
+ "C" + Twine(TOCLabelID++)) == nullptr) {
TOCEntry = GetTempSymbol("C", TOCLabelID);
}
}
@@ -325,6 +310,7 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
///
void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
+ bool isPPC64 = Subtarget.isPPC64();
// Lower multi-instruction pseudo operations.
switch (MI->getOpcode()) {
@@ -340,7 +326,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *PICBase = MF->getPICBaseSymbol();
// Emit the 'bl'.
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL)
// FIXME: We would like an efficient form for this, so we don't have to do
// a lot of extra uniquing.
.addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
@@ -349,6 +335,66 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitLabel(PICBase);
return;
}
+ case PPC::GetGBRO: {
+ // Get the offset from the GOT Base Register to the GOT
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ MCSymbol *PICOffset = MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol();
+ TmpInst.setOpcode(PPC::LWZ);
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(PICOffset, MCSymbolRefExpr::VK_None, OutContext);
+ const MCExpr *PB =
+ MCSymbolRefExpr::Create(MF->getPICBaseSymbol(),
+ MCSymbolRefExpr::VK_None,
+ OutContext);
+ const MCOperand MO = TmpInst.getOperand(1);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(MCBinaryExpr::CreateSub(Exp,
+ PB,
+ OutContext));
+ TmpInst.addOperand(MO);
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::UpdateGBR: {
+ // Update the GOT Base Register to point to the GOT. It may be possible to
+ // merge this with the PPC::GetGBRO, doing it all in one step.
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ TmpInst.setOpcode(PPC::ADD4);
+ TmpInst.addOperand(TmpInst.getOperand(0));
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::LWZtoc: {
+ // Transform %X3 = LWZtoc <ga:@min1>, %X2
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LWZ, and the global address operand to be a
+ // reference to the GOT entry we will synthesize later.
+ TmpInst.setOpcode(PPC::LWZ);
+ const MachineOperand &MO = MI->getOperand(1);
+
+ // Map symbol -> label of TOC entry
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+ MCSymbol *MOSymbol = nullptr;
+ if (MO.isGlobal())
+ MOSymbol = getSymbol(MO.getGlobal());
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = GetJTISymbol(MO.getIndex());
+
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_None,
+ OutContext);
+ const MCExpr *PB =
+ MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".L.TOC.")),
+ OutContext);
+ Exp = MCBinaryExpr::CreateSub(Exp, PB, OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
case PPC::LDtocJTI:
case PPC::LDtocCPT:
case PPC::LDtoc: {
@@ -362,7 +408,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Map symbol -> label of TOC entry
assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
- MCSymbol *MOSymbol = 0;
+ MCSymbol *MOSymbol = nullptr;
if (MO.isGlobal())
MOSymbol = getSymbol(MO.getGlobal());
else if (MO.isCPI())
@@ -376,7 +422,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
- OutStreamer.EmitInstruction(TmpInst);
+ EmitToStreamer(OutStreamer, TmpInst);
return;
}
@@ -384,45 +430,42 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Transform %Xd = ADDIStocHA %X2, <ga:@sym>
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
- // Change the opcode to ADDIS8. If the global address is external,
- // has common linkage, is a function address, or is a jump table
+ // Change the opcode to ADDIS8. If the global address is external, has
+ // common linkage, is a non-local function address, or is a jump table
// address, then generate a TOC entry and reference that. Otherwise
// reference the symbol directly.
TmpInst.setOpcode(PPC::ADDIS8);
const MachineOperand &MO = MI->getOperand(2);
assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
"Invalid operand for ADDIStocHA!");
- MCSymbol *MOSymbol = 0;
+ MCSymbol *MOSymbol = nullptr;
bool IsExternal = false;
- bool IsFunction = false;
+ bool IsNonLocalFunction = false;
bool IsCommon = false;
bool IsAvailExt = false;
if (MO.isGlobal()) {
- const GlobalValue *GValue = MO.getGlobal();
- const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
- const GlobalValue *RealGValue = GAlias ?
- GAlias->resolveAliasedGlobal(false) : GValue;
- MOSymbol = getSymbol(RealGValue);
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
- IsExternal = GVar && !GVar->hasInitializer();
- IsCommon = GVar && RealGValue->hasCommonLinkage();
- IsFunction = !GVar;
- IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage();
+ const GlobalValue *GV = MO.getGlobal();
+ MOSymbol = getSymbol(GV);
+ IsExternal = GV->isDeclaration();
+ IsCommon = GV->hasCommonLinkage();
+ IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker());
+ IsAvailExt = GV->hasAvailableExternallyLinkage();
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
- if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI() ||
- TM.getCodeModel() == CodeModel::Large)
+ if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt ||
+ MO.isJTI() || TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA,
OutContext);
TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
- OutStreamer.EmitInstruction(TmpInst);
+ EmitToStreamer(OutStreamer, TmpInst);
return;
}
case PPC::LDtocL: {
@@ -436,7 +479,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(1);
assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
"Invalid operand for LDtocL!");
- MCSymbol *MOSymbol = 0;
+ MCSymbol *MOSymbol = nullptr;
if (MO.isJTI())
MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
@@ -447,14 +490,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
else if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
- const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
- const GlobalValue *RealGValue = GAlias ?
- GAlias->resolveAliasedGlobal(false) : GValue;
- MOSymbol = getSymbol(RealGValue);
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
-
- if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
- RealGValue->hasAvailableExternallyLinkage() ||
+ MOSymbol = getSymbol(GValue);
+ if (GValue->getType()->getElementType()->isFunctionTy() ||
+ GValue->isDeclaration() || GValue->hasCommonLinkage() ||
+ GValue->hasAvailableExternallyLinkage() ||
TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
}
@@ -463,7 +502,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
- OutStreamer.EmitInstruction(TmpInst);
+ EmitToStreamer(OutStreamer, TmpInst);
return;
}
case PPC::ADDItocL: {
@@ -476,30 +515,28 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
TmpInst.setOpcode(PPC::ADDI8);
const MachineOperand &MO = MI->getOperand(2);
assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
- MCSymbol *MOSymbol = 0;
+ MCSymbol *MOSymbol = nullptr;
bool IsExternal = false;
- bool IsFunction = false;
+ bool IsNonLocalFunction = false;
if (MO.isGlobal()) {
- const GlobalValue *GValue = MO.getGlobal();
- const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
- const GlobalValue *RealGValue = GAlias ?
- GAlias->resolveAliasedGlobal(false) : GValue;
- MOSymbol = getSymbol(RealGValue);
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
- IsExternal = GVar && !GVar->hasInitializer();
- IsFunction = !GVar;
+ const GlobalValue *GV = MO.getGlobal();
+ MOSymbol = getSymbol(GV);
+ IsExternal = GV->isDeclaration();
+ IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker());
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
- if (IsFunction || IsExternal || TM.getCodeModel() == CodeModel::Large)
+ if (IsNonLocalFunction || IsExternal ||
+ TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
OutContext);
TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
- OutStreamer.EmitInstruction(TmpInst);
+ EmitToStreamer(OutStreamer, TmpInst);
return;
}
case PPC::ADDISgotTprelHA: {
@@ -512,18 +549,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymGotTprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(PPC::X2)
.addExpr(SymGotTprel));
return;
}
- case PPC::LDgotTprelL: {
+ case PPC::LDgotTprelL:
+ case PPC::LDgotTprelL32: {
// Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
// Change the opcode to LD.
- TmpInst.setOpcode(PPC::LD);
+ TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ);
const MachineOperand &MO = MI->getOperand(1);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
@@ -531,7 +569,25 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
- OutStreamer.EmitInstruction(TmpInst);
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+
+ case PPC::PPC32GOT: {
+ MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ const MCExpr *SymGotTlsL =
+ MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO,
+ OutContext);
+ const MCExpr *SymGotTlsHA =
+ MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA,
+ OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI)
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(SymGotTlsL));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(SymGotTlsHA));
return;
}
case PPC::ADDIStlsgdHA: {
@@ -544,7 +600,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymGotTlsGD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(PPC::X2)
.addExpr(SymGotTlsGD));
@@ -560,7 +616,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymGotTlsGD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymGotTlsGD));
@@ -581,7 +637,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS)
.addExpr(TlsRef)
.addExpr(SymVar));
return;
@@ -596,7 +652,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymGotTlsLD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(PPC::X2)
.addExpr(SymGotTlsLD));
@@ -612,7 +668,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymGotTlsLD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymGotTlsLD));
@@ -633,7 +689,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS)
.addExpr(TlsRef)
.addExpr(SymVar));
return;
@@ -648,7 +704,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(PPC::X3)
.addExpr(SymDtprel));
@@ -664,7 +720,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymDtprel));
@@ -679,7 +735,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MI->getOpcode() == PPC::MFOCRF ? PPC::MFCR : PPC::MFCR8;
OutStreamer.AddComment(PPCInstPrinter::
getRegisterName(MI->getOperand(1).getReg()));
- OutStreamer.EmitInstruction(MCInstBuilder(NewOpcode)
+ EmitToStreamer(OutStreamer, MCInstBuilder(NewOpcode)
.addReg(MI->getOperand(0).getReg()));
return;
}
@@ -695,7 +751,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
->getEncodingValue(MI->getOperand(0).getReg());
OutStreamer.AddComment(PPCInstPrinter::
getRegisterName(MI->getOperand(0).getReg()));
- OutStreamer.EmitInstruction(MCInstBuilder(NewOpcode)
+ EmitToStreamer(OutStreamer, MCInstBuilder(NewOpcode)
.addImm(Mask)
.addReg(MI->getOperand(1).getReg()));
return;
@@ -723,13 +779,76 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
- OutStreamer.EmitInstruction(TmpInst);
+ EmitToStreamer(OutStreamer, TmpInst);
+}
+
+void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (Subtarget.isELFv2ABI()) {
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+
+ if (TS)
+ TS->emitAbiVersion(2);
+ }
+
+ if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_)
+ return AsmPrinter::EmitStartOfAsmFile(M);
+
+ // FIXME: The use of .got2 assumes large GOT model (-fPIC), which is not
+ // optimal for some cases. We should consider supporting small model (-fpic)
+ // as well in the future.
+ assert(TM.getCodeModel() != CodeModel::Small &&
+ "Small code model PIC is currently unsupported.");
+ OutStreamer.SwitchSection(OutContext.getELFSection(".got2",
+ ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getReadOnly()));
+
+ MCSymbol *TOCSym = OutContext.GetOrCreateSymbol(Twine(".L.TOC."));
+ MCSymbol *CurrentPos = OutContext.CreateTempSymbol();
+
+ OutStreamer.EmitLabel(CurrentPos);
+
+ // The GOT pointer points to the middle of the GOT, in order to reference the
+ // entire 64kB range. 0x8000 is the midpoint.
+ const MCExpr *tocExpr =
+ MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(CurrentPos, OutContext),
+ MCConstantExpr::Create(0x8000, OutContext),
+ OutContext);
+
+ OutStreamer.EmitAssignment(TOCSym, tocExpr);
+
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
}
void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
- if (!Subtarget.isPPC64()) // linux/ppc32 - Normal entry label.
+ // linux/ppc32 - Normal entry label.
+ if (!Subtarget.isPPC64() && TM.getRelocationModel() != Reloc::PIC_)
return AsmPrinter::EmitFunctionEntryLabel();
-
+
+ if (!Subtarget.isPPC64()) {
+ const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
+ if (PPCFI->usesPICBase()) {
+ MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol();
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+ OutStreamer.EmitLabel(RelocSymbol);
+
+ const MCExpr *OffsExpr =
+ MCBinaryExpr::CreateSub(
+ MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".L.TOC.")),
+ OutContext),
+ MCSymbolRefExpr::Create(PICBase, OutContext),
+ OutContext);
+ OutStreamer.EmitValue(OffsExpr, 4);
+ OutStreamer.EmitLabel(CurrentFnSym);
+ return;
+ } else
+ return AsmPrinter::EmitFunctionEntryLabel();
+ }
+
+ // ELFv2 ABI - Normal entry label.
+ if (Subtarget.isELFv2ABI())
+ return AsmPrinter::EmitFunctionEntryLabel();
+
// Emit an official procedure descriptor.
MCSectionSubPair Current = OutStreamer.getCurrentSection();
const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
@@ -766,10 +885,17 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
bool isPPC64 = TD->getPointerSizeInBits() == 64;
PPCTargetStreamer &TS =
- static_cast<PPCTargetStreamer &>(OutStreamer.getTargetStreamer());
+ static_cast<PPCTargetStreamer &>(*OutStreamer.getTargetStreamer());
- if (isPPC64 && !TOC.empty()) {
- const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
+ if (!TOC.empty()) {
+ const MCSectionELF *Section;
+
+ if (isPPC64)
+ Section = OutStreamer.getContext().getELFSection(".toc",
+ ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ else
+ Section = OutStreamer.getContext().getELFSection(".got2",
ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
SectionKind::getReadOnly());
OutStreamer.SwitchSection(Section);
@@ -778,7 +904,10 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
E = TOC.end(); I != E; ++I) {
OutStreamer.EmitLabel(I->second);
MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
- TS.emitTCEntry(*S);
+ if (isPPC64)
+ TS.emitTCEntry(*S);
+ else
+ OutStreamer.EmitSymbolValue(S, 4);
}
}
@@ -804,6 +933,68 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
return AsmPrinter::doFinalization(M);
}
+/// EmitFunctionBodyStart - Emit a global entry point prefix for ELFv2.
+void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
+ // In the ELFv2 ABI, in functions that use the TOC register, we need to
+ // provide two entry points. The ABI guarantees that when calling the
+ // local entry point, r2 is set up by the caller to contain the TOC base
+ // for this function, and when calling the global entry point, r12 is set
+ // up by the caller to hold the address of the global entry point. We
+ // thus emit a prefix sequence along the following lines:
+ //
+ // func:
+ // # global entry point
+ // addis r2,r12,(.TOC.-func)@ha
+ // addi r2,r2,(.TOC.-func)@l
+ // .localentry func, .-func
+ // # local entry point, followed by function body
+ //
+ // This ensures we have r2 set up correctly while executing the function
+ // body, no matter which entry point is called.
+ if (Subtarget.isELFv2ABI()
+ // Only do all that if the function uses r2 in the first place.
+ && !MF->getRegInfo().use_empty(PPC::X2)) {
+
+ MCSymbol *GlobalEntryLabel = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(GlobalEntryLabel);
+ const MCSymbolRefExpr *GlobalEntryLabelExp =
+ MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext);
+
+ MCSymbol *TOCSymbol = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
+ const MCExpr *TOCDeltaExpr =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext),
+ GlobalEntryLabelExp, OutContext);
+
+ const MCExpr *TOCDeltaHi =
+ PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(PPC::X2)
+ .addReg(PPC::X12)
+ .addExpr(TOCDeltaHi));
+
+ const MCExpr *TOCDeltaLo =
+ PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI)
+ .addReg(PPC::X2)
+ .addReg(PPC::X2)
+ .addExpr(TOCDeltaLo));
+
+ MCSymbol *LocalEntryLabel = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(LocalEntryLabel);
+ const MCSymbolRefExpr *LocalEntryLabelExp =
+ MCSymbolRefExpr::Create(LocalEntryLabel, OutContext);
+ const MCExpr *LocalOffsetExp =
+ MCBinaryExpr::CreateSub(LocalEntryLabelExp,
+ GlobalEntryLabelExp, OutContext);
+
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+
+ if (TS)
+ TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp);
+ }
+}
+
/// EmitFunctionBodyEnd - Print the traceback table before the .size
/// directive.
///
@@ -854,13 +1045,12 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (Subtarget.isPPC64() && Directive < PPC::DIR_64)
Directive = PPC::DIR_64;
assert(Directive <= PPC::DIR_64 && "Directive out of range.");
-
- // FIXME: This is a total hack, finish mc'izing the PPC backend.
- if (OutStreamer.hasRawTextSupport()) {
- assert(Directive < array_lengthof(CPUDirectives) &&
- "CPUDirectives[] might not be up-to-date!");
- OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
- }
+
+ assert(Directive < array_lengthof(CPUDirectives) &&
+ "CPUDirectives[] might not be up-to-date!");
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+ TStreamer.emitMachine(CPUDirectives[Directive]);
// Prime text sections so they are adjacent. This reduces the likelihood a
// large data or debug section causes a branch to exceed 16M limit.
@@ -870,14 +1060,14 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (TM.getRelocationModel() == Reloc::PIC_) {
OutStreamer.SwitchSection(
OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
- MCSectionMachO::S_SYMBOL_STUBS |
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ MachO::S_SYMBOL_STUBS |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
32, SectionKind::getText()));
} else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
OutStreamer.SwitchSection(
OutContext.getMachOSection("__TEXT","__symbol_stub1",
- MCSectionMachO::S_SYMBOL_STUBS |
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ MachO::S_SYMBOL_STUBS |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
16, SectionKind::getText()));
}
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
@@ -909,8 +1099,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
if (TM.getRelocationModel() == Reloc::PIC_) {
const MCSection *StubSection =
OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
- MCSectionMachO::S_SYMBOL_STUBS |
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ MachO::S_SYMBOL_STUBS |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
32, SectionKind::getText());
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.SwitchSection(StubSection);
@@ -930,32 +1120,32 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext);
// mflr r0
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
// bcl 20, 31, AnonSymbol
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCLalways).addExpr(Anon));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCLalways).addExpr(Anon));
OutStreamer.EmitLabel(AnonSymbol);
// mflr r11
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
// addis r11, r11, ha16(LazyPtr - AnonSymbol)
const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, isDarwin, OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
.addReg(PPC::R11)
.addReg(PPC::R11)
.addExpr(SubHa16));
// mtlr r0
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
// ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
// lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, isDarwin, OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ EmitToStreamer(OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
.addReg(PPC::R12)
.addExpr(SubLo16).addExpr(SubLo16)
.addReg(PPC::R11));
// mtctr r12
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
// bctr
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTR));
OutStreamer.SwitchSection(LSPSection);
OutStreamer.EmitLabel(LazyPtr);
@@ -977,8 +1167,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
const MCSection *StubSection =
OutContext.getMachOSection("__TEXT","__symbol_stub1",
- MCSectionMachO::S_SYMBOL_STUBS |
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ MachO::S_SYMBOL_STUBS |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
16, SectionKind::getText());
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
MCSymbol *Stub = Stubs[i].first;
@@ -994,7 +1184,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
// lis r11, ha16(LazyPtr)
const MCExpr *LazyPtrHa16 =
PPCMCExpr::CreateHa(LazyPtrExpr, isDarwin, OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS)
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LIS)
.addReg(PPC::R11)
.addExpr(LazyPtrHa16));
@@ -1002,15 +1192,15 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
// lwzu r12, lo16(LazyPtr)(r11)
const MCExpr *LazyPtrLo16 =
PPCMCExpr::CreateLo(LazyPtrExpr, isDarwin, OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ EmitToStreamer(OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
.addReg(PPC::R12)
.addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
.addReg(PPC::R11));
// mtctr r12
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
// bctr
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTR));
OutStreamer.SwitchSection(LSPSection);
OutStreamer.EmitLabel(LazyPtr);
@@ -1051,7 +1241,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
E = Personalities.end(); I != E; ++I) {
if (*I) {
- MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
+ MCSymbol *NLPSym = getSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMIMacho.getGVStubEntry(NLPSym);
StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true);
@@ -1140,4 +1330,5 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
extern "C" void LLVMInitializePowerPCAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
+ TargetRegistry::RegisterAsmPrinter(ThePPC64LETarget, createPPCAsmPrinterPass);
}
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 3e608ca8f679..ee906712ee02 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ppc-branch-select"
#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCInstrBuilder.h"
@@ -26,6 +25,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "ppc-branch-select"
+
STATISTIC(NumExpanded, "Number of branches expanded to long format");
namespace llvm {
@@ -42,9 +43,9 @@ namespace {
/// BlockSizes - The sizes of the basic blocks in the function.
std::vector<unsigned> BlockSizes;
- virtual bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "PowerPC Branch Selector";
}
};
@@ -112,9 +113,12 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
unsigned MBBStartOffset = 0;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
- MachineBasicBlock *Dest = 0;
+ MachineBasicBlock *Dest = nullptr;
if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm())
Dest = I->getOperand(2).getMBB();
+ else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) &&
+ !I->getOperand(1).isImm())
+ Dest = I->getOperand(1).getMBB();
else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ ||
I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) &&
!I->getOperand(0).isImm())
@@ -166,6 +170,12 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
// Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
BuildMI(MBB, I, dl, TII->get(PPC::BCC))
.addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+ } else if (I->getOpcode() == PPC::BC) {
+ unsigned CRBit = I->getOperand(0).getReg();
+ BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2);
+ } else if (I->getOpcode() == PPC::BCn) {
+ unsigned CRBit = I->getOperand(0).getReg();
+ BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2);
} else if (I->getOpcode() == PPC::BDNZ) {
BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2);
} else if (I->getOpcode() == PPC::BDNZ8) {
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index e419b9b40d8e..ec1e34d91f93 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -23,31 +23,29 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ctrloops"
-
#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/Statistic.h"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "PPCTargetMachine.h"
-#include "PPC.h"
#ifndef NDEBUG
#include "llvm/CodeGen/MachineDominators.h"
@@ -61,6 +59,8 @@
using namespace llvm;
+#define DEBUG_TYPE "ctrloops"
+
#ifndef NDEBUG
static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
#endif
@@ -84,20 +84,20 @@ namespace {
public:
static char ID;
- PPCCTRLoops() : FunctionPass(ID), TM(0) {
+ PPCCTRLoops() : FunctionPass(ID), TM(nullptr) {
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
}
PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
- AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
}
@@ -109,7 +109,7 @@ namespace {
PPCTargetMachine *TM;
LoopInfo *LI;
ScalarEvolution *SE;
- DataLayout *TD;
+ const DataLayout *DL;
DominatorTree *DT;
const TargetLibraryInfo *LibInfo;
};
@@ -128,12 +128,12 @@ namespace {
initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
private:
MachineDominatorTree *MDT;
@@ -145,7 +145,7 @@ namespace {
INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
@@ -170,8 +170,9 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() {
bool PPCCTRLoops::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
- DT = &getAnalysis<DominatorTree>();
- TD = getAnalysisIfAvailable<DataLayout>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
bool MadeChange = false;
@@ -188,7 +189,7 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
- return ITy->getBitWidth() > (Is32Bit ? 32 : 64);
+ return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
return false;
}
@@ -369,6 +370,14 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
J->getOpcode() == Instruction::URem ||
J->getOpcode() == Instruction::SRem)) {
return true;
+ } else if (TT.isArch32Bit() &&
+ isLargeIntegerTy(false, J->getType()->getScalarType()) &&
+ (J->getOpcode() == Instruction::Shl ||
+ J->getOpcode() == Instruction::AShr ||
+ J->getOpcode() == Instruction::LShr)) {
+ // Only on PPC32, for 128-bit integers (specifically not 64-bit
+ // integers), these might be runtime calls.
+ return true;
} else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
// On PowerPC, indirect jumps use the counter register.
return true;
@@ -423,9 +432,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
SmallVector<BasicBlock*, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- BasicBlock *CountedExitBlock = 0;
- const SCEV *ExitCount = 0;
- BranchInst *CountedExitBranch = 0;
+ BasicBlock *CountedExitBlock = nullptr;
+ const SCEV *ExitCount = nullptr;
+ BranchInst *CountedExitBranch = nullptr;
for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
IE = ExitingBlocks.end(); I != IE; ++I) {
const SCEV *EC = SE->getExitCount(L, *I);
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index e8e7f4c2d226..222760a0cb91 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -15,6 +15,8 @@
/// CCIfSubtarget - Match if the current subtarget has a feature F.
class CCIfSubtarget<string F, CCAction A>
: CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+class CCIfNotSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("!State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
//===----------------------------------------------------------------------===//
// Return Value Calling Convention
@@ -23,17 +25,24 @@ class CCIfSubtarget<string F, CCAction A>
// Return-value convention for PowerPC
def RetCC_PPC : CallingConv<[
// On PPC64, integer return values are always promoted to i64
- CCIfType<[i32], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>,
+ CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>,
+ CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>,
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
+
+ // Floating point types returned as "direct" go into F1 .. F8; note that
+ // only the ELFv2 ABI fully utilizes all these registers.
+ CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
- CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
- CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
-
- // Vector types are always returned in V2.
- CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+ // Vector types returned as "direct" go into V2 .. V9; note that only the
+ // ELFv2 ABI fully utilizes all these registers.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
+ CCIfType<[v2f64, v2i64],
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
]>;
@@ -46,6 +55,7 @@ def RetCC_PPC : CallingConv<[
// Only handle ints and floats. All ints are promoted to i64.
// Vector types and quadword ints are not handled.
def CC_PPC64_ELF_FIS : CallingConv<[
+ CCIfType<[i1], CCPromoteToType<i64>>,
CCIfType<[i8], CCPromoteToType<i64>>,
CCIfType<[i16], CCPromoteToType<i64>>,
CCIfType<[i32], CCPromoteToType<i64>>,
@@ -58,14 +68,18 @@ def CC_PPC64_ELF_FIS : CallingConv<[
// and multiple register returns are "supported" to avoid compile
// errors, but none are handled by the fast selector.
def RetCC_PPC64_ELF_FIS : CallingConv<[
+ CCIfType<[i1], CCPromoteToType<i64>>,
CCIfType<[i8], CCPromoteToType<i64>>,
CCIfType<[i16], CCPromoteToType<i64>>,
CCIfType<[i32], CCPromoteToType<i64>>,
CCIfType<[i64], CCAssignToReg<[X3, X4]>>,
CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
- CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
- CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
- CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+ CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
+ CCIfType<[v2f64, v2i64],
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
]>;
//===----------------------------------------------------------------------===//
@@ -73,6 +87,8 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_PPC32_SVR4_Common : CallingConv<[
+ CCIfType<[i1], CCPromoteToType<i32>>,
+
// The ABI requires i64 to be passed in two adjacent registers with the first
// register having an odd register number.
CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
@@ -97,7 +113,7 @@ def CC_PPC32_SVR4_Common : CallingConv<[
CCIfType<[f32,f64], CCAssignToStack<8, 8>>,
// Vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToStack<16, 16>>
+ CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>
]>;
// This calling convention puts vector arguments always on the stack. It is used
@@ -113,6 +129,9 @@ def CC_PPC32_SVR4 : CallingConv<[
// The first 12 Vector arguments are passed in AltiVec registers.
CCIfType<[v16i8, v8i16, v4i32, v4f32],
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
+ CCIfType<[v2f64, v2i64],
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
+ VSH10, VSH11, VSH12, VSH13]>>,
CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 418736e21e86..08755238f925 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -32,7 +32,7 @@ namespace {
JITCodeEmitter &MCE;
MachineModuleInfo *MMI;
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineModuleInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -73,11 +73,13 @@ namespace {
unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getTLSCallEncoding(const MachineInstr &MI, unsigned OpNo) const;
- const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
+ const char *getPassName() const override {
+ return "PowerPC Machine Code Emitter";
+ }
/// runOnMachineFunction - emits the given MachineFunction to memory
///
- bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
/// emitBasicBlock - emits the given MachineBasicBlock to memory
///
@@ -102,7 +104,7 @@ bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
MMI = &getAnalysis<MachineModuleInfo>();
MCE.setModuleInfo(MMI);
do {
- MovePCtoLROffset = 0;
+ MovePCtoLROffset = nullptr;
MCE.startFunction(MF);
for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
emitBasicBlock(*BB);
@@ -121,7 +123,8 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
default:
MCE.emitWordBE(getBinaryCodeForInstr(MI));
break;
- case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::CFI_INSTRUCTION:
+ break;
case TargetOpcode::EH_LABEL:
MCE.emitLabel(MI.getOperand(0).getMCSymbol());
break;
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 4e3b0b83244a..2e524d604789 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -13,12 +13,11 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ppcfastisel"
#include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCISelLowering.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
-#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
@@ -28,12 +27,12 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -58,6 +57,8 @@
//===----------------------------------------------------------------------===//
using namespace llvm;
+#define DEBUG_TYPE "ppcfastisel"
+
namespace {
typedef struct Address {
@@ -80,12 +81,12 @@ typedef struct Address {
}
} Address;
-class PPCFastISel : public FastISel {
+class PPCFastISel final : public FastISel {
const TargetMachine &TM;
const TargetInstrInfo &TII;
const TargetLowering &TLI;
- const PPCSubtarget &PPCSubTarget;
+ const PPCSubtarget *PPCSubTarget;
LLVMContext *Context;
public:
@@ -95,31 +96,29 @@ class PPCFastISel : public FastISel {
TM(FuncInfo.MF->getTarget()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()),
- PPCSubTarget(
- *((static_cast<const PPCTargetMachine *>(&TM))->getSubtargetImpl())
- ),
+ PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()),
Context(&FuncInfo.Fn->getContext()) { }
// Backend specific FastISel code.
private:
- virtual bool TargetSelectInstruction(const Instruction *I);
- virtual unsigned TargetMaterializeConstant(const Constant *C);
- virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
- virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
- const LoadInst *LI);
- virtual bool FastLowerArguments();
- virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm);
- virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- uint64_t Imm);
- virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill);
- virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill);
+ bool TargetSelectInstruction(const Instruction *I) override;
+ unsigned TargetMaterializeConstant(const Constant *C) override;
+ unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
+ bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) override;
+ bool FastLowerArguments() override;
+ unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
+ unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
// Instruction selection routines.
private:
@@ -127,7 +126,6 @@ class PPCFastISel : public FastISel {
bool SelectStore(const Instruction *I);
bool SelectBranch(const Instruction *I);
bool SelectIndirectBr(const Instruction *I);
- bool SelectCmp(const Instruction *I);
bool SelectFPExt(const Instruction *I);
bool SelectFPTrunc(const Instruction *I);
bool SelectIToFP(const Instruction *I, bool IsSigned);
@@ -283,7 +281,7 @@ bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
// Given a value Obj, create an Address object Addr that represents its
// address. Return false if we can't handle it.
bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
- const User *U = NULL;
+ const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
// Don't walk into other basic blocks unless the object is an alloca from
@@ -325,11 +323,11 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
II != IE; ++II, ++GTI) {
const Value *Op = *II;
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
- const StructLayout *SL = TD.getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
- uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
for (;;) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
@@ -407,7 +405,7 @@ void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
// register and continue. This should almost never happen.
if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
Addr.Base.Reg = ResultReg;
Addr.BaseType = Address::RegBase;
@@ -499,13 +497,13 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
MFI.getObjectAlignment(Addr.Base.FI));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
// Base reg with offset in range.
} else if (UseOffset) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addImm(Addr.Offset).addReg(Addr.Base.Reg);
// Indexed form.
@@ -529,7 +527,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
case PPC::LFS: Opc = PPC::LFSX; break;
case PPC::LFD: Opc = PPC::LFDX; break;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(Addr.Base.Reg).addReg(IndexReg);
}
@@ -557,7 +555,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) {
// to constrain RA from using R0/X0 when this is not legal.
unsigned AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
- AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
+ AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
unsigned ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
@@ -615,12 +613,15 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
MFI.getObjectAlignment(Addr.Base.FI));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)).addReg(SrcReg)
- .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg)
+ .addImm(Addr.Offset)
+ .addFrameIndex(Addr.Base.FI)
+ .addMemOperand(MMO);
// Base reg with offset in range.
} else if (UseOffset)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
// Indexed form.
@@ -640,7 +641,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
case PPC::STFS: Opc = PPC::STFSX; break;
case PPC::STFD: Opc = PPC::STFDX; break;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg);
}
@@ -704,9 +705,9 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
CondReg))
return false;
- BuildMI(*BrBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCC))
+ BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
.addImm(PPCPred).addReg(CondReg).addMBB(TBB);
- FastEmitBranch(FBB, DL);
+ FastEmitBranch(FBB, DbgLoc);
FuncInfo.MBB->addSuccessor(TBB);
return true;
@@ -714,7 +715,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
- FastEmitBranch(Target, DL);
+ FastEmitBranch(Target, DbgLoc);
return true;
}
@@ -737,6 +738,9 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
+ if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits())
+ return false;
+
// See if operand 2 is an immediate encodeable in the compare.
// FIXME: Operands are not in canonical order at -O0, so an immediate
// operand in position 1 is a lost opportunity for now. We are
@@ -811,10 +815,10 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
}
if (!UseImm)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
.addReg(SrcReg1).addReg(SrcReg2);
else
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
.addReg(SrcReg1).addImm(Imm);
return true;
@@ -853,7 +857,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
// Round the result to single precision.
unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
.addReg(SrcReg);
UpdateValueMap(I, DestReg);
@@ -895,7 +899,7 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
if (!IsSigned) {
LoadOpc = PPC::LFIWZX;
Addr.Offset = 4;
- } else if (PPCSubTarget.hasLFIWAX()) {
+ } else if (PPCSubTarget->hasLFIWAX()) {
LoadOpc = PPC::LFIWAX;
Addr.Offset = 4;
}
@@ -936,7 +940,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
// We can only lower an unsigned convert if we have the newer
// floating-point conversion operations.
- if (!IsSigned && !PPCSubTarget.hasFPCVT())
+ if (!IsSigned && !PPCSubTarget->hasFPCVT())
return false;
// FIXME: For now we require the newer floating-point conversion operations
@@ -944,7 +948,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
// to single-precision float. Otherwise we have to generate a lot of
// fiddly code to avoid double rounding. If necessary, the fiddly code
// can be found in PPCTargetLowering::LowerINT_TO_FP().
- if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT())
return false;
// Extend the input if necessary.
@@ -972,7 +976,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
// Generate the convert.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(FPReg);
UpdateValueMap(I, DestReg);
@@ -1007,7 +1011,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
// to determine the required register class.
unsigned AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
- AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
+ AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
unsigned ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
@@ -1026,6 +1030,10 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (DstVT != MVT::i32 && DstVT != MVT::i64)
return false;
+ // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
+ if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT())
+ return false;
+
Value *Src = I->getOperand(0);
Type *SrcTy = Src->getType();
if (!isTypeLegal(SrcTy, SrcVT))
@@ -1044,7 +1052,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
if (InRC == &PPC::F4RCRegClass) {
unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg)
.addReg(SrcReg).addImm(PPC::F8RCRegClassID);
SrcReg = TmpReg;
@@ -1059,12 +1067,12 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (IsSigned)
Opc = PPC::FCTIWZ;
else
- Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
+ Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
else
Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
// Generate the convert.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(SrcReg);
// Now move the integer value from a float register to an integer register.
@@ -1157,8 +1165,10 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
}
if (UseImm) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
- .addReg(SrcReg1).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+ ResultReg)
+ .addReg(SrcReg1)
+ .addImm(Imm);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1173,7 +1183,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
if (ISDOpcode == ISD::SUB)
std::swap(SrcReg1, SrcReg2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg1).addReg(SrcReg2);
UpdateValueMap(I, ResultReg);
return true;
@@ -1191,6 +1201,13 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
bool IsVarArg) {
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+
+ // Reserve space for the linkage area on the stack.
+ bool isELFv2ABI = PPCSubTarget->isELFv2ABI();
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+ isELFv2ABI);
+ CCInfo.AllocateStack(LinkageSize, 8);
+
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
// Bail out if we can't handle any of the arguments.
@@ -1200,7 +1217,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
// Skip vector arguments for now, as well as long double and
// uint128_t, and anything that isn't passed in a register.
- if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 ||
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
!VA.isRegLoc() || VA.needsCustom())
return false;
@@ -1212,8 +1229,16 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
// Get a count of how many bytes are to be pushed onto the stack.
NumBytes = CCInfo.getNextStackOffset();
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
+ NumBytes = std::max(NumBytes, LinkageSize + 64);
+
// Issue CALLSEQ_START.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TII.getCallFrameSetupOpcode()))
.addImm(NumBytes);
@@ -1272,9 +1297,9 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
++NextGPR;
} else
ArgReg = NextGPR++;
-
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ArgReg).addReg(Arg);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
RegArgs.push_back(ArgReg);
}
@@ -1287,7 +1312,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const Instruction *I, CallingConv::ID CC,
unsigned &NumBytes, bool IsVarArg) {
// Issue CallSEQ_END.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TII.getCallFrameDestroyOpcode()))
.addImm(NumBytes).addImm(0);
@@ -1317,14 +1342,14 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
ResultReg = createResultReg(CpyRC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(SourcePhysReg);
// If necessary, round the floating result to single precision.
} else if (CopyVT == MVT::f64) {
ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP),
ResultReg).addReg(SourcePhysReg);
// If only the low half of a general register is needed, generate
@@ -1335,7 +1360,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
ResultReg = createResultReg(&PPC::GPRCRegClass);
// Convert physical register from G8RC to GPRC.
SourcePhysReg -= PPC::X0 - PPC::R0;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(SourcePhysReg);
}
@@ -1442,7 +1467,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
if (Arg == 0)
return false;
- unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);
Args.push_back(*II);
@@ -1467,7 +1492,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
// Build direct call with NOP for TOC restore.
// FIXME: We can and should optimize away the NOP for local calls.
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::BL8_NOP));
// Add callee.
MIB.addGlobalAddress(GV);
@@ -1476,6 +1501,10 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
MIB.addReg(RegArgs[II], RegState::Implicit);
+ // Direct calls in the ELFv2 ABI need the TOC register live into the call.
+ if (PPCSubTarget->isELFv2ABI())
+ MIB.addReg(PPC::X2, RegState::Implicit);
+
// Add a register mask with the call-preserved registers. Proper
// defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(CC));
@@ -1524,8 +1553,8 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
const Constant *C = cast<Constant>(RV);
unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
unsigned RetReg = ValLocs[0].getLocReg();
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- RetReg).addReg(SrcReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
RetRegs.push_back(RetReg);
} else {
@@ -1580,14 +1609,14 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
}
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), RetRegs[i])
.addReg(SrcReg);
}
}
}
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::BLR));
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
@@ -1617,7 +1646,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
Opc = PPC::EXTSW_32_64;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(SrcReg);
// Unsigned 32-bit extensions use RLWINM.
@@ -1629,7 +1658,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
MB = 16;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM),
DestReg)
.addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
@@ -1642,7 +1671,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
MB = 48;
else
MB = 32;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::RLDICL_32_64), DestReg)
.addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
}
@@ -1656,9 +1685,9 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
if (AddrReg == 0)
return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::MTCTR8))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8))
.addReg(AddrReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCTR8));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
const IndirectBrInst *IB = cast<IndirectBrInst>(I);
for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
@@ -1686,7 +1715,8 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) {
// The only interesting case is when we need to switch register classes.
if (SrcVT == MVT::i64) {
unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY),
ResultReg).addReg(SrcReg, 0, PPC::sub_32);
SrcReg = ResultReg;
}
@@ -1793,7 +1823,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
return 0;
// All FP constants are loaded from the constant pool.
- unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
+ unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
assert(Align > 0 && "Unexpectedly missing alignment information!");
unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
@@ -1809,25 +1839,25 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
// For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
TmpReg)
.addConstantPoolIndex(Idx).addReg(PPC::X2);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addImm(0).addReg(TmpReg).addMemOperand(MMO);
} else {
// Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
// But for large code model, we must generate a LDtocL followed
// by the LF[SD].
if (CModel == CodeModel::Large) {
unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addImm(0).addReg(TmpReg2);
} else
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
.addReg(TmpReg)
.addMemOperand(MMO);
@@ -1851,25 +1881,20 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
// FIXME: Jump tables are not yet required because fast-isel doesn't
// handle switches; if that changes, we need them as well. For now,
// what follows assumes everything's a generic (or TLS) global address.
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (!GVar) {
- // If GV is an alias, use the aliasee for determining thread-locality.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
- }
// FIXME: We don't yet handle the complexity of TLS.
- bool IsTLS = GVar && GVar->isThreadLocal();
- if (IsTLS)
+ if (GV->isThreadLocal())
return 0;
// For small code model, generate a simple TOC load.
if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtoc), DestReg)
- .addGlobalAddress(GV).addReg(PPC::X2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
+ DestReg)
+ .addGlobalAddress(GV)
+ .addReg(PPC::X2);
else {
- // If the address is an externally defined symbol, a symbol with
- // common or externally available linkage, a function address, or a
+ // If the address is an externally defined symbol, a symbol with common
+ // or externally available linkage, a non-local function address, or a
// jump table address (not yet needed), or if we are generating code
// for large code model, we generate:
// LDtocL(GV, ADDIStocHA(%X2, GV))
@@ -1877,20 +1902,21 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
// ADDItocL(ADDIStocHA(%X2, GV), GV)
// Either way, start with the ADDIStocHA:
unsigned HighPartReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
- // !GVar implies a function address. An external variable is one
- // without an initializer.
// If/when switches are implemented, jump tables should be handled
// on the "if" path here.
- if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() ||
- GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage())
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
+ if (CModel == CodeModel::Large ||
+ (GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker())) ||
+ GV->isDeclaration() || GV->hasCommonLinkage() ||
+ GV->hasAvailableExternallyLinkage())
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
DestReg).addGlobalAddress(GV).addReg(HighPartReg);
else
// Otherwise generate the ADDItocL.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDItocL),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
DestReg).addReg(HighPartReg).addGlobalAddress(GV);
}
@@ -1908,21 +1934,21 @@ unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
if (isInt<16>(Imm))
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
.addImm(Imm);
else if (Lo) {
// Both Lo and Hi have nonzero bits.
unsigned TmpReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
.addImm(Hi);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
.addReg(TmpReg).addImm(Lo);
} else
// Just Hi bits.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
.addImm(Hi);
@@ -1962,7 +1988,7 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
unsigned TmpReg2;
if (Imm) {
TmpReg2 = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLDICR),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR),
TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
} else
TmpReg2 = TmpReg1;
@@ -1970,14 +1996,14 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
unsigned TmpReg3, Hi, Lo;
if ((Hi = (Remainder >> 16) & 0xFFFF)) {
TmpReg3 = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORIS8),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8),
TmpReg3).addReg(TmpReg2).addImm(Hi);
} else
TmpReg3 = TmpReg2;
if ((Lo = Remainder & 0xFFFF)) {
unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORI8),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
ResultReg).addReg(TmpReg3).addImm(Lo);
return ResultReg;
}
@@ -1989,6 +2015,15 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
// Materialize an integer constant into a register, and return
// the register number (or zero if we failed to handle it).
unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
+ // If we're using CR bit registers for i1 values, handle that as a special
+ // case first.
+ if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
+ const ConstantInt *CI = cast<ConstantInt>(C);
+ unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
+ return ImmReg;
+ }
if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
VT != MVT::i8 && VT != MVT::i1)
@@ -2002,7 +2037,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
if (isInt<16>(CI->getSExtValue())) {
unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
unsigned ImmReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
.addImm(CI->getSExtValue());
return ImmReg;
}
@@ -2051,7 +2086,7 @@ unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
if (SI != FuncInfo.StaticAllocaMap.end()) {
unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
ResultReg).addFrameIndex(SI->second).addImm(0);
return ResultReg;
}
@@ -2130,7 +2165,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
unsigned ResultReg = MI->getOperand(0).getReg();
- if (!PPCEmitLoad(VT, ResultReg, Addr, 0, IsZExt))
+ if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt))
return false;
MI->eraseFromParent();
@@ -2154,6 +2189,15 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
if (Opc != ISD::Constant)
return 0;
+ // If we're using CR bit registers for i1 values, handle that as a special
+ // case first.
+ if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
+ unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
+ return ImmReg;
+ }
+
if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
VT != MVT::i8 && VT != MVT::i1)
return 0;
@@ -2233,6 +2277,6 @@ namespace llvm {
if (Subtarget->isPPC64() && Subtarget->isSVR4ABI())
return new PPCFastISel(FuncInfo, LibInfo);
- return 0;
+ return nullptr;
}
}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 0ac2ceddcc9e..b2577a9c7cf7 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -15,6 +15,7 @@
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
+#include "PPCSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -35,6 +36,167 @@ static const uint16_t VRRegNo[] = {
PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
};
+PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+ (STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0),
+ Subtarget(STI) {}
+
+// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
+const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
+ unsigned &NumEntries) const {
+ if (Subtarget.isDarwinABI()) {
+ NumEntries = 1;
+ if (Subtarget.isPPC64()) {
+ static const SpillSlot darwin64Offsets = {PPC::X31, -8};
+ return &darwin64Offsets;
+ } else {
+ static const SpillSlot darwinOffsets = {PPC::R31, -4};
+ return &darwinOffsets;
+ }
+ }
+
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI()) {
+ NumEntries = 0;
+ return nullptr;
+ }
+
+ // Note that the offsets here overlap, but this is fixed up in
+ // processFunctionBeforeFrameFinalized.
+
+ static const SpillSlot Offsets[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::R31, -4},
+ {PPC::R30, -8},
+ {PPC::R29, -12},
+ {PPC::R28, -16},
+ {PPC::R27, -20},
+ {PPC::R26, -24},
+ {PPC::R25, -28},
+ {PPC::R24, -32},
+ {PPC::R23, -36},
+ {PPC::R22, -40},
+ {PPC::R21, -44},
+ {PPC::R20, -48},
+ {PPC::R19, -52},
+ {PPC::R18, -56},
+ {PPC::R17, -60},
+ {PPC::R16, -64},
+ {PPC::R15, -68},
+ {PPC::R14, -72},
+
+ // CR save area offset. We map each of the nonvolatile CR fields
+ // to the slot for CR2, which is the first of the nonvolatile CR
+ // fields to be assigned, so that we only allocate one save slot.
+ // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
+ {PPC::CR2, -4},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}};
+
+ static const SpillSlot Offsets64[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::X31, -8},
+ {PPC::X30, -16},
+ {PPC::X29, -24},
+ {PPC::X28, -32},
+ {PPC::X27, -40},
+ {PPC::X26, -48},
+ {PPC::X25, -56},
+ {PPC::X24, -64},
+ {PPC::X23, -72},
+ {PPC::X22, -80},
+ {PPC::X21, -88},
+ {PPC::X20, -96},
+ {PPC::X19, -104},
+ {PPC::X18, -112},
+ {PPC::X17, -120},
+ {PPC::X16, -128},
+ {PPC::X15, -136},
+ {PPC::X14, -144},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}};
+
+ if (Subtarget.isPPC64()) {
+ NumEntries = array_lengthof(Offsets64);
+
+ return Offsets64;
+ } else {
+ NumEntries = array_lengthof(Offsets);
+
+ return Offsets;
+ }
+}
+
/// RemoveVRSaveCode - We have found that this function does not need any code
/// to manipulate the VRSAVE register, even though it uses vector registers.
/// This can happen when the only registers used are known to be live in or out
@@ -222,7 +384,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
if (!DisableRedZone &&
(Subtarget.isPPC64() || // 32-bit SVR4, no stack-
!Subtarget.isSVR4ABI() || // allocated locals.
- FrameSize == 0) &&
+ FrameSize == 0) &&
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
@@ -236,9 +398,10 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Get the maximum call frame size of all the calls.
unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
- // Maximum call frame needs to be at least big enough for linkage and 8 args.
- unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(),
- Subtarget.isDarwinABI());
+ // Maximum call frame needs to be at least big enough for linkage area.
+ unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(),
+ Subtarget.isDarwinABI(),
+ Subtarget.isELFv2ABI());
maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
// If we have dynamic alloca then maxCallFrameSize needs to be aligned so
@@ -281,8 +444,8 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
// Naked functions have no stack frame pushed, so we don't have a frame
// pointer.
- if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::Naked))
+ if (MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
return false;
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
@@ -299,7 +462,7 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
const PPCRegisterInfo *RegInfo =
static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
bool HasBP = RegInfo->hasBasePointer(MF);
- unsigned BPReg = HasBP ? (unsigned) PPC::R30 : FPReg;
+ unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
@@ -344,18 +507,17 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
DebugLoc dl;
bool needsFrameMoves = MMI.hasDebugInfo() ||
MF.getFunction()->needsUnwindTableEntry();
+ bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
// Get the ABI.
bool isDarwinABI = Subtarget.isDarwinABI();
bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
assert((isDarwinABI || isSVR4ABI) &&
"Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
- // Prepare for frame info.
- MCSymbol *FrameLabel = 0;
-
// Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
// process it.
if (!isSVR4ABI)
@@ -387,7 +549,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
- unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30;
+ unsigned BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
@@ -429,7 +591,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
assert(FPIndex && "No Frame Pointer Save Slot!");
FPOffset = FFI->getObjectOffset(FPIndex);
} else {
- FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ FPOffset =
+ PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
}
}
@@ -442,7 +605,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BPOffset = FFI->getObjectOffset(BPIndex);
} else {
BPOffset =
- PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI);
+ PPCFrameLowering::getBasePointerSaveOffset(isPPC64,
+ isDarwinABI,
+ isPIC);
}
}
@@ -463,6 +628,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
"Prologue CR saving supported only in 64-bit mode");
if (!MustSaveCRs.empty()) { // will only occur for PPC64
+ // FIXME: In the ELFv2 ABI, we are not required to save all CR fields.
+ // If only one or two CR fields are clobbered, it could be more
+ // efficient to use mfocrf to selectively save just those fields.
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg);
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
@@ -561,36 +729,38 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// Add the "machine moves" for the instructions we generated above, but in
// reverse order.
if (needsFrameMoves) {
- // Mark effective beginning of when frame pointer becomes valid.
- FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
-
// Show update of SP.
assert(NegFrameSize);
- MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(FrameLabel, NegFrameSize));
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
if (HasFP) {
unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
- MMI.addFrameInst(
- MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset));
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
if (HasBP) {
unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
- MMI.addFrameInst(
- MCCFIInstruction::createOffset(FrameLabel, Reg, BPOffset));
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
if (MustSaveLR) {
unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
- MMI.addFrameInst(
- MCCFIInstruction::createOffset(FrameLabel, Reg, LROffset));
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
}
- MCSymbol *ReadyLabel = 0;
-
// If there is a frame pointer, copy R1 into R31
if (HasFP) {
BuildMI(MBB, MBBI, dl, OrInst, FPReg)
@@ -598,19 +768,17 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addReg(SPReg);
if (needsFrameMoves) {
- ReadyLabel = MMI.getContext().CreateTempSymbol();
-
// Mark effective beginning of when frame pointer is ready.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
-
unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
- MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(ReadyLabel, Reg));
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
}
if (needsFrameMoves) {
- MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel;
-
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
@@ -631,14 +799,22 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// For 64-bit SVR4 when we have spilled CRs, the spill location
// is SP+8, not a frame-relative slot.
if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
- MMI.addFrameInst(MCCFIInstruction::createOffset(
- Label, MRI->getDwarfRegNum(PPC::CR2, true), 8));
+ // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
+ // the whole CR word. In the ELFv2 ABI, every CR that was
+ // actually saved gets its own CFI record.
+ unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(CRReg, true), 8));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
continue;
}
int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
- MMI.addFrameInst(MCCFIInstruction::createOffset(
- Label, MRI->getDwarfRegNum(Reg, true), Offset));
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
}
}
@@ -675,6 +851,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Get the ABI.
bool isDarwinABI = Subtarget.isDarwinABI();
bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
// Check if the link register (LR) has been saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -685,7 +862,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
- unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30;
+ unsigned BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
@@ -712,7 +889,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
assert(FPIndex && "No Frame Pointer Save Slot!");
FPOffset = FFI->getObjectOffset(FPIndex);
} else {
- FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ FPOffset =
+ PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
}
}
@@ -725,7 +903,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
BPOffset = FFI->getObjectOffset(BPIndex);
} else {
BPOffset =
- PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI);
+ PPCFrameLowering::getBasePointerSaveOffset(isPPC64,
+ isDarwinABI,
+ isPIC);
}
}
@@ -902,6 +1082,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FPSI = FI->getFramePointerSaveIndex();
bool isPPC64 = Subtarget.isPPC64();
bool isDarwinABI = Subtarget.isDarwinABI();
+ bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
MachineFrameInfo *MFI = MF.getFrameInfo();
// If the frame pointer save index hasn't been defined yet.
@@ -916,7 +1097,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int BPSI = FI->getBasePointerSaveIndex();
if (!BPSI && RegInfo->hasBasePointer(MF)) {
- int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI);
+ int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI, isPIC);
// Allocate the frame index for the base pointer save area.
BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
// Save the result.
@@ -930,9 +1111,9 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
}
- // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
+ // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
// function uses CR 2, 3, or 4.
- if (!isPPC64 && !isDarwinABI &&
+ if (!isPPC64 && !isDarwinABI &&
(MRI.isPhysRegUsed(PPC::CR2) ||
MRI.isPhysRegUsed(PPC::CR3) ||
MRI.isPhysRegUsed(PPC::CR4))) {
@@ -1106,10 +1287,10 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
unsigned Reg = CSI[i].getReg();
if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
- // Leave Darwin logic as-is.
- || (!Subtarget.isSVR4ABI() &&
- (PPC::CRBITRCRegClass.contains(Reg) ||
- PPC::CRRCRegClass.contains(Reg)))) {
+ // Leave Darwin logic as-is.
+ || (!Subtarget.isSVR4ABI() &&
+ (PPC::CRBITRCRegClass.contains(Reg) ||
+ PPC::CRRCRegClass.contains(Reg)))) {
int FI = CSI[i].getFrameIdx();
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
@@ -1190,11 +1371,11 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
}
}
-bool
+bool
PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
// Currently, this function only handles SVR4 32- and 64-bit ABIs.
// Return false otherwise to maintain pre-existing behavior.
@@ -1207,7 +1388,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
-
+
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
// Only Darwin actually uses the VRSAVE register, but it can still appear
@@ -1237,21 +1418,21 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
CRSpilled = true;
FuncInfo->setSpillsCR();
- // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
- // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
- CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
+ // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
+ // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
+ CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
.addReg(Reg, RegState::ImplicitKill);
- MBB.insert(MI, CRMIB);
- MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
- .addReg(PPC::R12,
- getKillRegState(true)),
- CSI[i].getFrameIdx()));
+ MBB.insert(MI, CRMIB);
+ MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
+ .addReg(PPC::R12,
+ getKillRegState(true)),
+ CSI[i].getFrameIdx()));
}
} else {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.storeRegToStackSlot(MBB, MI, Reg, true,
- CSI[i].getFrameIdx(), RC, TRI);
+ CSI[i].getFrameIdx(), RC, TRI);
}
}
return true;
@@ -1260,8 +1441,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
static void
restoreCRs(bool isPPC64, bool is31,
bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
@@ -1275,12 +1456,12 @@ restoreCRs(bool isPPC64, bool is31,
else {
// 32-bit: FP-relative
MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
- PPC::R12),
- CSI[CSIIndex].getFrameIdx()));
+ PPC::R12),
+ CSI[CSIIndex].getFrameIdx()));
RestoreOp = PPC::MTOCRF;
MoveReg = PPC::R12;
}
-
+
if (CR2Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
.addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
@@ -1335,11 +1516,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-bool
+bool
PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
// Currently, this function only handles SVR4 32- and 64-bit ABIs.
// Return false otherwise to maintain pre-existing behavior.
@@ -1387,20 +1568,20 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// When we first encounter a non-CR register after seeing at
// least one CR register, restore all spilled CRs together.
if ((CR2Spilled || CR3Spilled || CR4Spilled)
- && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
bool is31 = needsFP(*MF);
restoreCRs(Subtarget.isPPC64(), is31,
CR2Spilled, CR3Spilled, CR4Spilled,
- MBB, I, CSI, CSIIndex);
- CR2Spilled = CR3Spilled = CR4Spilled = false;
+ MBB, I, CSI, CSIIndex);
+ CR2Spilled = CR3Spilled = CR4Spilled = false;
}
// Default behavior for non-CR saves.
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(),
- RC, TRI);
+ RC, TRI);
assert(I != MBB.begin() &&
- "loadRegFromStackSlot didn't insert any code!");
+ "loadRegFromStackSlot didn't insert any code!");
}
// Insert in reverse order.
@@ -1409,16 +1590,15 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
else {
I = BeforeI;
++I;
- }
+ }
}
// If we haven't yet spilled the CRs, do so now.
if (CR2Spilled || CR3Spilled || CR4Spilled) {
- bool is31 = needsFP(*MF);
+ bool is31 = needsFP(*MF);
restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
- MBB, I, CSI, CSIIndex);
+ MBB, I, CSI, CSIIndex);
}
return true;
}
-
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 7aab37e188fe..c0c7d248f8d2 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -14,23 +14,18 @@
#define POWERPC_FRAMEINFO_H
#include "PPC.h"
-#include "PPCSubtarget.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
- class PPCSubtarget;
+class PPCSubtarget;
class PPCFrameLowering: public TargetFrameLowering {
const PPCSubtarget &Subtarget;
public:
- PPCFrameLowering(const PPCSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
- (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0),
- Subtarget(sti) {
- }
+ PPCFrameLowering(const PPCSubtarget &STI);
unsigned determineFrameLayout(MachineFunction &MF,
bool UpdateMF = true,
@@ -38,37 +33,37 @@ public:
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void emitPrologue(MachineFunction &MF) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- bool hasFP(const MachineFunction &MF) const;
+ bool hasFP(const MachineFunction &MF) const override;
bool needsFP(const MachineFunction &MF) const;
void replaceFPWithRealFP(MachineFunction &MF) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo *TRI) const override;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
/// targetHandlesStackFrameRounding - Returns true if the target is
/// responsible for rounding up the stack frame (probably at emitPrologue
/// time).
- bool targetHandlesStackFrameRounding() const { return true; }
+ bool targetHandlesStackFrameRounding() const override { return true; }
/// getReturnSaveOffset - Return the previous frame offset to save the
/// return address.
@@ -79,6 +74,12 @@ public:
return isPPC64 ? 16 : 4;
}
+ /// getTOCSaveOffset - Return the previous frame offset to save the
+ /// TOC register -- 64-bit SVR4 ABI only.
+ static unsigned getTOCSaveOffset(bool isELFv2ABI) {
+ return isELFv2ABI ? 24 : 40;
+ }
+
/// getFramePointerSaveOffset - Return the previous frame offset to save the
/// frame pointer.
static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
@@ -96,208 +97,30 @@ public:
/// getBasePointerSaveOffset - Return the previous frame offset to save the
/// base pointer.
- static unsigned getBasePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
+ static unsigned getBasePointerSaveOffset(bool isPPC64,
+ bool isDarwinABI,
+ bool isPIC) {
if (isDarwinABI)
return isPPC64 ? -16U : -8U;
// SVR4 ABI: First slot in the general register save area.
- return isPPC64 ? -16U : -8U;
+ return isPPC64 ? -16U : isPIC ? -12U : -8U;
}
/// getLinkageSize - Return the size of the PowerPC ABI linkage area.
///
- static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
+ static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI,
+ bool isELFv2ABI) {
if (isDarwinABI || isPPC64)
- return 6 * (isPPC64 ? 8 : 4);
+ return (isELFv2ABI ? 4 : 6) * (isPPC64 ? 8 : 4);
// SVR4 ABI:
return 8;
}
- /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
- /// argument area.
- static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) {
- // For the Darwin ABI / 64-bit SVR4 ABI:
- // The prolog code of the callee may store up to 8 GPR argument registers to
- // the stack, allowing va_start to index over them in memory if its varargs.
- // Because we cannot tell if this is needed on the caller side, we have to
- // conservatively assume that it is needed. As such, make sure we have at
- // least enough stack space for the caller to store the 8 GPRs.
- if (isDarwinABI || isPPC64)
- return 8 * (isPPC64 ? 8 : 4);
-
- // 32-bit SVR4 ABI:
- // There is no default stack allocated for the 8 first GPR arguments.
- return 0;
- }
-
- /// getMinCallFrameSize - Return the minimum size a call frame can be using
- /// the PowerPC ABI.
- static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) {
- // The call frame needs to be at least big enough for linkage and 8 args.
- return getLinkageSize(isPPC64, isDarwinABI) +
- getMinCallArgumentsSize(isPPC64, isDarwinABI);
- }
-
- // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
const SpillSlot *
- getCalleeSavedSpillSlots(unsigned &NumEntries) const {
- if (Subtarget.isDarwinABI()) {
- NumEntries = 1;
- if (Subtarget.isPPC64()) {
- static const SpillSlot darwin64Offsets = {PPC::X31, -8};
- return &darwin64Offsets;
- } else {
- static const SpillSlot darwinOffsets = {PPC::R31, -4};
- return &darwinOffsets;
- }
- }
-
- // Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI()) {
- NumEntries = 0;
- return 0;
- }
-
- // Note that the offsets here overlap, but this is fixed up in
- // processFunctionBeforeFrameFinalized.
-
- static const SpillSlot Offsets[] = {
- // Floating-point register save area offsets.
- {PPC::F31, -8},
- {PPC::F30, -16},
- {PPC::F29, -24},
- {PPC::F28, -32},
- {PPC::F27, -40},
- {PPC::F26, -48},
- {PPC::F25, -56},
- {PPC::F24, -64},
- {PPC::F23, -72},
- {PPC::F22, -80},
- {PPC::F21, -88},
- {PPC::F20, -96},
- {PPC::F19, -104},
- {PPC::F18, -112},
- {PPC::F17, -120},
- {PPC::F16, -128},
- {PPC::F15, -136},
- {PPC::F14, -144},
-
- // General register save area offsets.
- {PPC::R31, -4},
- {PPC::R30, -8},
- {PPC::R29, -12},
- {PPC::R28, -16},
- {PPC::R27, -20},
- {PPC::R26, -24},
- {PPC::R25, -28},
- {PPC::R24, -32},
- {PPC::R23, -36},
- {PPC::R22, -40},
- {PPC::R21, -44},
- {PPC::R20, -48},
- {PPC::R19, -52},
- {PPC::R18, -56},
- {PPC::R17, -60},
- {PPC::R16, -64},
- {PPC::R15, -68},
- {PPC::R14, -72},
-
- // CR save area offset. We map each of the nonvolatile CR fields
- // to the slot for CR2, which is the first of the nonvolatile CR
- // fields to be assigned, so that we only allocate one save slot.
- // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
- {PPC::CR2, -4},
-
- // VRSAVE save area offset.
- {PPC::VRSAVE, -4},
-
- // Vector register save area
- {PPC::V31, -16},
- {PPC::V30, -32},
- {PPC::V29, -48},
- {PPC::V28, -64},
- {PPC::V27, -80},
- {PPC::V26, -96},
- {PPC::V25, -112},
- {PPC::V24, -128},
- {PPC::V23, -144},
- {PPC::V22, -160},
- {PPC::V21, -176},
- {PPC::V20, -192}
- };
-
- static const SpillSlot Offsets64[] = {
- // Floating-point register save area offsets.
- {PPC::F31, -8},
- {PPC::F30, -16},
- {PPC::F29, -24},
- {PPC::F28, -32},
- {PPC::F27, -40},
- {PPC::F26, -48},
- {PPC::F25, -56},
- {PPC::F24, -64},
- {PPC::F23, -72},
- {PPC::F22, -80},
- {PPC::F21, -88},
- {PPC::F20, -96},
- {PPC::F19, -104},
- {PPC::F18, -112},
- {PPC::F17, -120},
- {PPC::F16, -128},
- {PPC::F15, -136},
- {PPC::F14, -144},
-
- // General register save area offsets.
- {PPC::X31, -8},
- {PPC::X30, -16},
- {PPC::X29, -24},
- {PPC::X28, -32},
- {PPC::X27, -40},
- {PPC::X26, -48},
- {PPC::X25, -56},
- {PPC::X24, -64},
- {PPC::X23, -72},
- {PPC::X22, -80},
- {PPC::X21, -88},
- {PPC::X20, -96},
- {PPC::X19, -104},
- {PPC::X18, -112},
- {PPC::X17, -120},
- {PPC::X16, -128},
- {PPC::X15, -136},
- {PPC::X14, -144},
-
- // VRSAVE save area offset.
- {PPC::VRSAVE, -4},
-
- // Vector register save area
- {PPC::V31, -16},
- {PPC::V30, -32},
- {PPC::V29, -48},
- {PPC::V28, -64},
- {PPC::V27, -80},
- {PPC::V26, -96},
- {PPC::V25, -112},
- {PPC::V24, -128},
- {PPC::V23, -144},
- {PPC::V22, -160},
- {PPC::V21, -176},
- {PPC::V20, -192}
- };
-
- if (Subtarget.isPPC64()) {
- NumEntries = array_lengthof(Offsets64);
-
- return Offsets64;
- } else {
- NumEntries = array_lengthof(Offsets);
-
- return Offsets;
- }
- }
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
};
-
} // End llvm namespace
#endif
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 0df50e17dd9d..d9b242cad265 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -11,38 +11,226 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "pre-RA-sched"
#include "PPCHazardRecognizers.h"
#include "PPC.h"
#include "PPCInstrInfo.h"
+#include "PPCTargetMachine.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-//===----------------------------------------------------------------------===//
-// PowerPC Scoreboard Hazard Recognizer
-void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+#define DEBUG_TYPE "pre-RA-sched"
+
+bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
+ // FIXME: Move this.
+ if (isBCTRAfterSet(SU))
+ return true;
+
const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
if (!MCID)
- // This is a PPC pseudo-instruction.
- return;
+ return false;
+
+ if (!MCID->mayLoad())
+ return false;
+
+ // SU is a load; for any predecessors in this dispatch group, that are stores,
+ // and with which we have an ordering dependency, return true.
+ for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
+ const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
+ if (!PredMCID || !PredMCID->mayStore())
+ continue;
+
+ if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier())
+ continue;
+
+ for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
+ if (SU->Preds[i].getSUnit() == CurGroup[j])
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (!MCID)
+ return false;
+
+ if (!MCID->isBranch())
+ return false;
+
+ // SU is a branch; for any predecessors in this dispatch group, with which we
+ // have a data dependence and set the counter register, return true.
+ for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
+ const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
+ if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR)
+ continue;
+
+ if (SU->Preds[i].isCtrl())
+ continue;
+
+ for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
+ if (SU->Preds[i].getSUnit() == CurGroup[j])
+ return true;
+ }
- ScoreboardHazardRecognizer::EmitInstruction(SU);
+ return false;
+}
+
+// FIXME: Remove this when we don't need this:
+namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } }
+
+// FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific.
+
+bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
+ unsigned &NSlots) {
+ // FIXME: Indirectly, this information is contained in the itinerary, and
+ // we should derive it from there instead of separately specifying it
+ // here.
+ unsigned IIC = MCID->getSchedClass();
+ switch (IIC) {
+ default:
+ NSlots = 1;
+ break;
+ case PPC::Sched::IIC_IntDivW:
+ case PPC::Sched::IIC_IntDivD:
+ case PPC::Sched::IIC_LdStLoadUpd:
+ case PPC::Sched::IIC_LdStLDU:
+ case PPC::Sched::IIC_LdStLFDU:
+ case PPC::Sched::IIC_LdStLFDUX:
+ case PPC::Sched::IIC_LdStLHA:
+ case PPC::Sched::IIC_LdStLHAU:
+ case PPC::Sched::IIC_LdStLWA:
+ case PPC::Sched::IIC_LdStSTDU:
+ case PPC::Sched::IIC_LdStSTFDU:
+ NSlots = 2;
+ break;
+ case PPC::Sched::IIC_LdStLoadUpdX:
+ case PPC::Sched::IIC_LdStLDUX:
+ case PPC::Sched::IIC_LdStLHAUX:
+ case PPC::Sched::IIC_LdStLWARX:
+ case PPC::Sched::IIC_LdStLDARX:
+ case PPC::Sched::IIC_LdStSTDUX:
+ case PPC::Sched::IIC_LdStSTDCX:
+ case PPC::Sched::IIC_LdStSTWCX:
+ case PPC::Sched::IIC_BrMCRX: // mtcr
+ // FIXME: Add sync/isync (here and in the itinerary).
+ NSlots = 4;
+ break;
+ }
+
+ // FIXME: record-form instructions need a different itinerary class.
+ if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1)
+ NSlots = 2;
+
+ switch (IIC) {
+ default:
+ // All multi-slot instructions must come first.
+ return NSlots > 1;
+ case PPC::Sched::IIC_BrCR: // cr logicals
+ case PPC::Sched::IIC_SprMFCR:
+ case PPC::Sched::IIC_SprMFCRF:
+ case PPC::Sched::IIC_SprMTSPR:
+ return true;
+ }
}
ScheduleHazardRecognizer::HazardType
-PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ if (Stalls == 0 && isLoadAfterStore(SU))
+ return NoopHazard;
+
return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
}
-void PPCScoreboardHazardRecognizer::AdvanceCycle() {
- ScoreboardHazardRecognizer::AdvanceCycle();
+bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ unsigned NSlots;
+ if (MCID && mustComeFirst(MCID, NSlots) && CurSlots)
+ return true;
+
+ return ScoreboardHazardRecognizer::ShouldPreferAnother(SU);
+}
+
+unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
+ // We only need to fill out a maximum of 5 slots here: The 6th slot could
+ // only be a second branch, and otherwise the next instruction will start a
+ // new group.
+ if (isLoadAfterStore(SU) && CurSlots < 6) {
+ unsigned Directive =
+ DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ // If we're using a special group-terminating nop, then we need only one.
+ if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
+ Directive == PPC::DIR_PWR8 )
+ return 1;
+
+ return 5 - CurSlots;
+ }
+
+ return ScoreboardHazardRecognizer::PreEmitNoops(SU);
}
-void PPCScoreboardHazardRecognizer::Reset() {
- ScoreboardHazardRecognizer::Reset();
+void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (MCID) {
+ if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ } else {
+ DEBUG(dbgs() << "**** Adding to dispatch group: SU(" <<
+ SU->NodeNum << "): ");
+ DEBUG(DAG->dumpNode(SU));
+
+ unsigned NSlots;
+ bool MustBeFirst = mustComeFirst(MCID, NSlots);
+
+ // If this instruction must come first, but does not, then it starts a
+ // new group.
+ if (MustBeFirst && CurSlots) {
+ CurSlots = CurBranches = 0;
+ CurGroup.clear();
+ }
+
+ CurSlots += NSlots;
+ CurGroup.push_back(SU);
+
+ if (MCID->isBranch())
+ ++CurBranches;
+ }
+ }
+
+ return ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() {
+ return ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() {
+ llvm_unreachable("Bottom-up scheduling not supported");
+}
+
+void PPCDispatchGroupSBHazardRecognizer::Reset() {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ return ScoreboardHazardRecognizer::Reset();
+}
+
+void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
+ unsigned Directive =
+ DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ // If the group has now filled all of its slots, or if we're using a special
+ // group-terminating nop, the group is complete.
+ if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
+ Directive == PPC::DIR_PWR8 || CurSlots == 6) {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ } else {
+ CurGroup.push_back(nullptr);
+ ++CurSlots;
+ }
}
//===----------------------------------------------------------------------===//
@@ -71,8 +259,8 @@ void PPCScoreboardHazardRecognizer::Reset() {
// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
//
-PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetMachine &TM)
- : TM(TM) {
+PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG)
+ : DAG(DAG) {
EndDispatchGroup();
}
@@ -91,7 +279,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
bool &isFirst, bool &isSingle,
bool &isCracked,
bool &isLoad, bool &isStore) {
- const MCInstrDesc &MCID = TM.getInstrInfo()->get(Opcode);
+ const MCInstrDesc &MCID = DAG.TII->get(Opcode);
isLoad = MCID.mayLoad();
isStore = MCID.mayStore();
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 84b8e6de4579..23f76c16d138 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -21,19 +21,30 @@
namespace llvm {
-/// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based
-/// hazard recognizer for generic PPC processors.
-class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer {
+/// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based
+/// hazard recognizer for PPC ooo processors with dispatch-group hazards.
+class PPCDispatchGroupSBHazardRecognizer : public ScoreboardHazardRecognizer {
const ScheduleDAG *DAG;
+ SmallVector<SUnit *, 7> CurGroup;
+ unsigned CurSlots, CurBranches;
+
+ bool isLoadAfterStore(SUnit *SU);
+ bool isBCTRAfterSet(SUnit *SU);
+ bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots);
public:
- PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData,
+ PPCDispatchGroupSBHazardRecognizer(const InstrItineraryData *ItinData,
const ScheduleDAG *DAG_) :
- ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {}
-
- virtual HazardType getHazardType(SUnit *SU, int Stalls);
- virtual void EmitInstruction(SUnit *SU);
- virtual void AdvanceCycle();
- virtual void Reset();
+ ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_),
+ CurSlots(0), CurBranches(0) {}
+
+ HazardType getHazardType(SUnit *SU, int Stalls) override;
+ bool ShouldPreferAnother(SUnit* SU) override;
+ unsigned PreEmitNoops(SUnit *SU) override;
+ void EmitInstruction(SUnit *SU) override;
+ void AdvanceCycle() override;
+ void RecedeCycle() override;
+ void Reset() override;
+ void EmitNoop() override;
};
/// PPCHazardRecognizer970 - This class defines a finite state automata that
@@ -43,7 +54,7 @@ public:
/// setting the CTR register then branching through it within a dispatch group),
/// or storing then loading from the same address within a dispatch group.
class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
- const TargetMachine &TM;
+ const ScheduleDAG &DAG;
unsigned NumIssued; // Number of insts issued, including advanced cycles.
@@ -64,11 +75,11 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
unsigned NumStores;
public:
- PPCHazardRecognizer970(const TargetMachine &TM);
- virtual HazardType getHazardType(SUnit *SU, int Stalls);
- virtual void EmitInstruction(SUnit *SU);
- virtual void AdvanceCycle();
- virtual void Reset();
+ PPCHazardRecognizer970(const ScheduleDAG &DAG);
+ virtual HazardType getHazardType(SUnit *SU, int Stalls) override;
+ virtual void EmitInstruction(SUnit *SU) override;
+ virtual void AdvanceCycle() override;
+ virtual void Reset() override;
private:
/// EndDispatchGroup - Called when we are finishing a new dispatch group.
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index d25762a5bbca..536c2824fb7a 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -12,9 +12,9 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ppc-codegen"
#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -27,6 +27,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -34,6 +35,12 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+#define DEBUG_TYPE "ppc-codegen"
+
+// FIXME: Remove this once the bug has been fixed!
+cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
+cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
+
namespace llvm {
void initializePPCDAGToDAGISelPass(PassRegistry&);
}
@@ -45,29 +52,31 @@ namespace {
///
class PPCDAGToDAGISel : public SelectionDAGISel {
const PPCTargetMachine &TM;
- const PPCTargetLowering &PPCLowering;
- const PPCSubtarget &PPCSubTarget;
+ const PPCTargetLowering *PPCLowering;
+ const PPCSubtarget *PPCSubTarget;
unsigned GlobalBaseReg;
public:
explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
: SelectionDAGISel(tm), TM(tm),
- PPCLowering(*TM.getTargetLowering()),
- PPCSubTarget(*TM.getSubtargetImpl()) {
+ PPCLowering(TM.getTargetLowering()),
+ PPCSubTarget(TM.getSubtargetImpl()) {
initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnMachineFunction(MachineFunction &MF) {
+ bool runOnMachineFunction(MachineFunction &MF) override {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
+ PPCLowering = TM.getTargetLowering();
+ PPCSubTarget = TM.getSubtargetImpl();
SelectionDAGISel::runOnMachineFunction(MF);
- if (!PPCSubTarget.isSVR4ABI())
+ if (!PPCSubTarget->isSVR4ABI())
InsertVRSaveCode(MF);
return true;
}
- virtual void PostprocessISelDAG();
+ void PostprocessISelDAG() override;
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
@@ -83,7 +92,7 @@ namespace {
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy());
+ return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy());
}
/// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
@@ -104,7 +113,7 @@ namespace {
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
- SDNode *Select(SDNode *N);
+ SDNode *Select(SDNode *N) override;
SDNode *SelectBitfieldInsert(SDNode *N);
@@ -116,7 +125,7 @@ namespace {
/// a base register plus a signed 16-bit displacement [r+imm].
bool SelectAddrImm(SDValue N, SDValue &Disp,
SDValue &Base) {
- return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
}
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
@@ -136,20 +145,20 @@ namespace {
/// represented as an indexed [r+r] operation. Returns false if it can
/// be represented by [r+imm], which are preferred.
bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
- return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
}
/// SelectAddrIdxOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
- return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
+ return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
}
/// SelectAddrImmX4 - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement that is a multiple of 4.
/// Suitable for use by STD and friends.
bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
- return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
}
// Select an address into a single register.
@@ -163,16 +172,16 @@ namespace {
/// a register. The case of adding a (possibly relocatable) constant to a
/// register can be improved, but it is wrong to substitute Reg+Reg for
/// Reg in an asm, because the load or store opcode would have to change.
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps) {
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) override {
OutOps.push_back(Op);
return false;
}
void InsertVRSaveCode(MachineFunction &MF);
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "PowerPC DAG->DAG Pattern Instruction Selection";
}
@@ -181,6 +190,12 @@ namespace {
private:
SDNode *SelectSETCC(SDNode *N);
+
+ void PeepholePPC64();
+ void PeepholeCROps();
+
+ bool AllUsersSelectZero(SDNode *N);
+ void SwapAllSelectUsers(SDNode *N);
};
}
@@ -260,10 +275,22 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
DebugLoc dl;
- if (PPCLowering.getPointerTy() == MVT::i32) {
- GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
+ if (PPCLowering->getPointerTy() == MVT::i32) {
+ if (PPCSubTarget->isTargetELF())
+ GlobalBaseReg = PPC::R30;
+ else
+ GlobalBaseReg =
+ RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ if (PPCSubTarget->isTargetELF()) {
+ unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ BuildMI(FirstMBB, MBBI, dl,
+ TII.get(PPC::GetGBRO), TempReg).addReg(GlobalBaseReg);
+ BuildMI(FirstMBB, MBBI, dl,
+ TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg).addReg(TempReg);
+ MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
+ }
} else {
GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
@@ -271,7 +298,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
}
}
return CurDAG->getRegister(GlobalBaseReg,
- PPCLowering.getPointerTy()).getNode();
+ PPCLowering->getPointerTy()).getNode();
}
/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
@@ -403,8 +430,8 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
SDLoc dl(N);
APInt LKZ, LKO, RKZ, RKO;
- CurDAG->ComputeMaskedBits(Op0, LKZ, LKO);
- CurDAG->ComputeMaskedBits(Op1, RKZ, RKO);
+ CurDAG->computeKnownBits(Op0, LKZ, LKO);
+ CurDAG->computeKnownBits(Op1, RKZ, RKO);
unsigned TargetMask = LKZ.getZExtValue();
unsigned InsertMask = RKZ.getZExtValue();
@@ -447,11 +474,18 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
}
if (Op1Opc == ISD::AND) {
+ // The AND mask might not be a constant, and we need to make sure that
+ // if we're going to fold the masking with the insert, all bits not
+ // know to be zero in the mask are known to be one.
+ APInt MKZ, MKO;
+ CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO);
+ bool CanFoldMask = InsertMask == MKO.getZExtValue();
+
unsigned SHOpc = Op1.getOperand(0).getOpcode();
- if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) &&
+ if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
- // Note that Value must be in range here (less than 32) because
- // otherwise there would not be any bits set in InsertMask.
+ // Note that Value must be in range here (less than 32) because
+ // otherwise there would not be any bits set in InsertMask.
Op1 = Op1.getOperand(0).getOperand(0);
SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
}
@@ -463,7 +497,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
}
}
- return 0;
+ return nullptr;
}
/// SelectCC - Select a comparison of the specified values with the specified
@@ -561,7 +595,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
Opc = PPC::FCMPUS;
} else {
assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
- Opc = PPC::FCMPUD;
+ Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
}
return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
}
@@ -629,85 +663,108 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
- switch (CC) {
- case ISD::SETEQ:
- case ISD::SETUEQ:
- case ISD::SETNE:
- case ISD::SETUNE:
- if (VecVT == MVT::v16i8)
- return PPC::VCMPEQUB;
- else if (VecVT == MVT::v8i16)
- return PPC::VCMPEQUH;
- else if (VecVT == MVT::v4i32)
- return PPC::VCMPEQUW;
- // v4f32 != v4f32 could be translate to unordered not equal
- else if (VecVT == MVT::v4f32)
- return PPC::VCMPEQFP;
- break;
- case ISD::SETLT:
- case ISD::SETGT:
- case ISD::SETLE:
- case ISD::SETGE:
- if (VecVT == MVT::v16i8)
- return PPC::VCMPGTSB;
- else if (VecVT == MVT::v8i16)
- return PPC::VCMPGTSH;
- else if (VecVT == MVT::v4i32)
- return PPC::VCMPGTSW;
- else if (VecVT == MVT::v4f32)
- return PPC::VCMPGTFP;
- break;
- case ISD::SETULT:
- case ISD::SETUGT:
- case ISD::SETUGE:
- case ISD::SETULE:
- if (VecVT == MVT::v16i8)
- return PPC::VCMPGTUB;
- else if (VecVT == MVT::v8i16)
- return PPC::VCMPGTUH;
- else if (VecVT == MVT::v4i32)
- return PPC::VCMPGTUW;
- break;
- case ISD::SETOEQ:
- if (VecVT == MVT::v4f32)
- return PPC::VCMPEQFP;
- break;
- case ISD::SETOLT:
- case ISD::SETOGT:
- case ISD::SETOLE:
- if (VecVT == MVT::v4f32)
- return PPC::VCMPGTFP;
- break;
- case ISD::SETOGE:
- if (VecVT == MVT::v4f32)
- return PPC::VCMPGEFP;
- break;
- default:
- break;
- }
- llvm_unreachable("Invalid integer vector compare condition");
-}
+static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
+ bool HasVSX, bool &Swap, bool &Negate) {
+ Swap = false;
+ Negate = false;
-// getVCmpEQInst: return the equal compare instruction for the specified vector
-// type. Since this is for altivec specific code, only support the altivec
-// types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) {
- switch (VecVT) {
- case MVT::v16i8:
- return PPC::VCMPEQUB;
- case MVT::v8i16:
- return PPC::VCMPEQUH;
- case MVT::v4i32:
- return PPC::VCMPEQUW;
- case MVT::v4f32:
- return PPC::VCMPEQFP;
- default:
- llvm_unreachable("Invalid integer vector compare condition");
+ if (VecVT.isFloatingPoint()) {
+ /* Handle some cases by swapping input operands. */
+ switch (CC) {
+ case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
+ case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+ case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
+ case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
+ case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+ case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
+ default: break;
+ }
+ /* Handle some cases by negating the result. */
+ switch (CC) {
+ case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+ case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
+ case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
+ case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
+ default: break;
+ }
+ /* We have instructions implementing the remaining cases. */
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPEQDP;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGTDP;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGEDP;
+ break;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid floating-point vector compare condition");
+ } else {
+ /* Handle some cases by swapping input operands. */
+ switch (CC) {
+ case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
+ case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+ case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+ case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
+ default: break;
+ }
+ /* Handle some cases by negating the result. */
+ switch (CC) {
+ case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+ case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
+ case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
+ case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
+ default: break;
+ }
+ /* We have instructions implementing the remaining cases. */
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETUEQ:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPEQUB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPEQUH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPEQUW;
+ break;
+ case ISD::SETGT:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPGTSB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPGTSH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPGTSW;
+ break;
+ case ISD::SETUGT:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPGTUB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPGTUH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPGTUW;
+ break;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid integer vector compare condition");
}
}
-
SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
SDLoc dl(N);
unsigned Imm;
@@ -715,7 +772,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
- if (isInt32Immediate(N->getOperand(1), Imm)) {
+ if (!PPCSubTarget->useCRBits() &&
+ isInt32Immediate(N->getOperand(1), Imm)) {
// We can codegen setcc op, imm very efficiently compared to a brcond.
// Check for those cases here.
// setcc op, 0
@@ -726,7 +784,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
case ISD::SETEQ: {
Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
case ISD::SETNE: {
if (isPPC64) break;
@@ -738,14 +796,14 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
}
case ISD::SETLT: {
SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
case ISD::SETGT: {
SDValue T =
SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
}
} else if (Imm == ~0U) { // setcc op, -1
@@ -775,7 +833,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
Op), 0);
SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
case ISD::SETGT: {
SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
@@ -795,56 +853,25 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
EVT VecVT = LHS.getValueType();
- MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
- unsigned int VCmpInst = getVCmpInst(VT, CC);
-
- switch (CC) {
- case ISD::SETEQ:
- case ISD::SETOEQ:
- case ISD::SETUEQ:
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
- case ISD::SETNE:
- case ISD::SETONE:
- case ISD::SETUNE: {
- SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPC::VNOR, VecVT, VCmp, VCmp);
- }
- case ISD::SETLT:
- case ISD::SETOLT:
- case ISD::SETULT:
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS);
- case ISD::SETGT:
- case ISD::SETOGT:
- case ISD::SETUGT:
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
- case ISD::SETGE:
- case ISD::SETOGE:
- case ISD::SETUGE: {
- // Small optimization: Altivec provides a 'Vector Compare Greater Than
- // or Equal To' instruction (vcmpgefp), so in this case there is no
- // need for extra logic for the equal compare.
- if (VecVT.getSimpleVT().isFloatingPoint()) {
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
- } else {
- SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT);
- SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ);
- }
- }
- case ISD::SETLE:
- case ISD::SETOLE:
- case ISD::SETULE: {
- SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT);
- SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ);
- }
- default:
- llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
+ bool Swap, Negate;
+ unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
+ PPCSubTarget->hasVSX(), Swap, Negate);
+ if (Swap)
+ std::swap(LHS, RHS);
+
+ if (Negate) {
+ SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
+ return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
+ PPC::VNOR,
+ VecVT, VCmp, VCmp);
}
+
+ return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
}
+ if (PPCSubTarget->useCRBits())
+ return nullptr;
+
bool Inv;
unsigned Idx = getCRIdxForSetCC(CC, Inv);
SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
@@ -853,7 +880,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
// Force the ccreg into CR7.
SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
- SDValue InFlag(0, 0); // Null incoming flag value.
+ SDValue InFlag(nullptr, 0); // Null incoming flag value.
CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
InFlag).getValue(1);
@@ -863,7 +890,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
getI32Imm(31), getI32Imm(31) };
if (!Inv)
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
// Get the specified bit.
SDValue Tmp =
@@ -878,7 +905,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
if (N->isMachineOpcode()) {
N->setNodeId(-1);
- return NULL; // Already selected.
+ return nullptr; // Already selected.
}
switch (N->getOpcode()) {
@@ -959,8 +986,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
break;
}
- case ISD::SETCC:
- return SelectSETCC(N);
+ case ISD::SETCC: {
+ SDNode *SN = SelectSETCC(N);
+ if (SN)
+ return SN;
+ break;
+ }
case PPCISD::GlobalBaseReg:
return getGlobalBaseReg();
@@ -1056,7 +1087,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Offset, Base, Chain };
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
- PPCLowering.getPointerTy(),
+ PPCLowering->getPointerTy(),
MVT::Other, Ops);
} else {
unsigned Opcode;
@@ -1091,7 +1122,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Base, Offset, Chain };
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
- PPCLowering.getPointerTy(),
+ PPCLowering->getPointerTy(),
MVT::Other, Ops);
}
}
@@ -1106,7 +1137,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
SDValue Val = N->getOperand(0).getOperand(0);
SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
// If this is just a masked value where the input is not handled above, and
// is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
@@ -1115,20 +1146,34 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
N->getOperand(0).getOpcode() != ISD::ROTL) {
SDValue Val = N->getOperand(0);
SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
// If this is a 64-bit zero-extension mask, emit rldicl.
if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
isMask_64(Imm64)) {
SDValue Val = N->getOperand(0);
MB = 64 - CountTrailingOnes_64(Imm64);
- SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) };
- return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3);
+ SH = 0;
+
+ // If the operand is a logical right shift, we can fold it into this
+ // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
+ // for n <= mb. The right shift is really a left rotate followed by a
+ // mask, and this mask is a more-restrictive sub-mask of the mask implied
+ // by the shift.
+ if (Val.getOpcode() == ISD::SRL &&
+ isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
+ assert(Imm < 64 && "Illegal shift amount");
+ Val = Val.getOperand(0);
+ SH = 64 - Imm;
+ }
+
+ SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB) };
+ return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
}
// AND X, 0 -> 0, not "rlwinm 32".
if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
ReplaceUses(SDValue(N, 0), N->getOperand(1));
- return NULL;
+ return nullptr;
}
// ISD::OR doesn't get all the bitfield insertion fun.
// (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
@@ -1161,7 +1206,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
isRotateAndMask(N, Imm, true, SH, MB, ME)) {
SDValue Ops[] = { N->getOperand(0).getOperand(0),
getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
// Other cases are autogenerated.
@@ -1173,17 +1218,45 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
isRotateAndMask(N, Imm, true, SH, MB, ME)) {
SDValue Ops[] = { N->getOperand(0).getOperand(0),
getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
// Other cases are autogenerated.
break;
}
+ // FIXME: Remove this once the ANDI glue bug is fixed:
+ case PPCISD::ANDIo_1_EQ_BIT:
+ case PPCISD::ANDIo_1_GT_BIT: {
+ if (!ANDIGlueBug)
+ break;
+
+ EVT InVT = N->getOperand(0).getValueType();
+ assert((InVT == MVT::i64 || InVT == MVT::i32) &&
+ "Invalid input type for ANDIo_1_EQ_BIT");
+
+ unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
+ SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
+ N->getOperand(0),
+ CurDAG->getTargetConstant(1, InVT)), 0);
+ SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
+ SDValue SRIdxVal =
+ CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
+ PPC::sub_eq : PPC::sub_gt, MVT::i32);
+
+ return CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1,
+ CR0Reg, SRIdxVal,
+ SDValue(AndI.getNode(), 1) /* glue */);
+ }
case ISD::SELECT_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
+ // If this is a select of i1 operands, we'll pattern match it.
+ if (PPCSubTarget->useCRBits() &&
+ N->getOperand(0).getValueType() == MVT::i1)
+ break;
+
// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
if (!isPPC64)
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
@@ -1202,6 +1275,36 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
}
SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+
+ if (N->getValueType(0) == MVT::i1) {
+ // An i1 select is: (c & t) | (!c & f).
+ bool Inv;
+ unsigned Idx = getCRIdxForSetCC(CC, Inv);
+
+ unsigned SRI;
+ switch (Idx) {
+ default: llvm_unreachable("Invalid CC index");
+ case 0: SRI = PPC::sub_lt; break;
+ case 1: SRI = PPC::sub_gt; break;
+ case 2: SRI = PPC::sub_eq; break;
+ case 3: SRI = PPC::sub_un; break;
+ }
+
+ SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
+
+ SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
+ CCBit, CCBit), 0);
+ SDValue C = Inv ? NotCCBit : CCBit,
+ NotC = Inv ? CCBit : NotCCBit;
+
+ SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
+ C, N->getOperand(2)), 0);
+ SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
+ NotC, N->getOperand(3)), 0);
+
+ return CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
+ }
+
unsigned BROpc = getPredicateForSetCC(CC);
unsigned SelectCCOp;
@@ -1218,16 +1321,60 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
getI32Imm(BROpc) };
- return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
+ return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
}
+ case ISD::VSELECT:
+ if (PPCSubTarget->hasVSX()) {
+ SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
+ }
+
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
+ N->getValueType(0) == MVT::v2i64)) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+ SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
+ Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
+ unsigned DM[2];
+
+ for (int i = 0; i < 2; ++i)
+ if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
+ DM[i] = 0;
+ else
+ DM[i] = 1;
+
+ SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), MVT::i32);
+
+ if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
+ Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ isa<LoadSDNode>(Op1.getOperand(0))) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
+ SDValue Base, Offset;
+
+ if (LD->isUnindexed() &&
+ SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
+ SDValue Chain = LD->getChain();
+ SDValue Ops[] = { Base, Offset, Chain };
+ return CurDAG->SelectNodeTo(N, PPC::LXVDSX,
+ N->getValueType(0), Ops);
+ }
+ }
+
+ SDValue Ops[] = { Op1, Op2, DMV };
+ return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
+ }
+
+ break;
case PPCISD::BDNZ:
case PPCISD::BDZ: {
- bool IsPPC64 = PPCSubTarget.isPPC64();
+ bool IsPPC64 = PPCSubTarget->isPPC64();
SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ?
(IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
- MVT::Other, Ops, 2);
+ MVT::Other, Ops);
}
case PPCISD::COND_BRANCH: {
// Op #0 is the Chain.
@@ -1240,14 +1387,36 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
N->getOperand(0), N->getOperand(4) };
- return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5);
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
}
case ISD::BR_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ unsigned PCC = getPredicateForSetCC(CC);
+
+ if (N->getOperand(2).getValueType() == MVT::i1) {
+ unsigned Opc;
+ bool Swap;
+ switch (PCC) {
+ default: llvm_unreachable("Unexpected Boolean-operand predicate");
+ case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
+ case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
+ case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
+ case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
+ case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
+ case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
+ }
+
+ SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
+ N->getOperand(Swap ? 3 : 2),
+ N->getOperand(Swap ? 2 : 3)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other,
+ BitComp, N->getOperand(4), N->getOperand(0));
+ }
+
SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
- SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode,
+ SDValue Ops[] = { getI32Imm(PCC), CondCode,
N->getOperand(4), N->getOperand(0) };
- return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
}
case ISD::BRIND: {
// FIXME: Should custom lower this.
@@ -1260,7 +1429,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
}
case PPCISD::TOC_ENTRY: {
- assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
+ if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
+ SDValue GA = N->getOperand(0);
+ return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
+ N->getOperand(1));
+ }
+ assert (PPCSubTarget->isPPC64() &&
+ "Only supported for 64-bit ABI and 32-bit SVR4");
// For medium and large code model, we generate two instructions as
// described below. Otherwise we allow SelectCodeCommon to handle this,
@@ -1269,10 +1444,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
break;
- // The first source operand is a TargetGlobalAddress or a
- // TargetJumpTable. If it is an externally defined symbol, a symbol
- // with common linkage, a function address, or a jump table address,
- // or if we are generating code for large code model, we generate:
+ // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
+ // If it is an externally defined symbol, a symbol with common linkage,
+ // a non-local function address, or a jump table address, or if we are
+ // generating code for large code model, we generate:
// LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
// Otherwise we generate:
// ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
@@ -1287,18 +1462,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
const GlobalValue *GValue = G->getGlobal();
- const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
- const GlobalValue *RealGValue = GAlias ?
- GAlias->resolveAliasedGlobal(false) : GValue;
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
- assert((GVar || isa<Function>(RealGValue)) &&
- "Unexpected global value subclass!");
-
- // An external variable is one without an initializer. For these,
- // for variables with common linkage, and for Functions, generate
- // the LDtocL form.
- if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
- RealGValue->hasAvailableExternallyLinkage())
+ if ((GValue->getType()->getElementType()->isFunctionTy() &&
+ (GValue->isDeclaration() || GValue->isWeakForLinker())) ||
+ GValue->isDeclaration() || GValue->hasCommonLinkage() ||
+ GValue->hasAvailableExternallyLinkage())
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
}
@@ -1382,7 +1549,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return SelectCode(N);
}
-/// PostProcessISelDAG - Perform some late peephole optimizations
+/// PostprocessISelDAG - Perform some late peephole optimizations
/// on the DAG representation.
void PPCDAGToDAGISel::PostprocessISelDAG() {
@@ -1390,8 +1557,480 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
if (TM.getOptLevel() == CodeGenOpt::None)
return;
+ PeepholePPC64();
+ PeepholeCROps();
+}
+
+// Check if all users of this node will become isel where the second operand
+// is the constant zero. If this is so, and if we can negate the condition,
+// then we can flip the true and false operands. This will allow the zero to
+// be folded with the isel so that we don't need to materialize a register
+// containing zero.
+bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
+ // If we're not using isel, then this does not matter.
+ if (!PPCSubTarget->hasISEL())
+ return false;
+
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (!User->isMachineOpcode())
+ return false;
+ if (User->getMachineOpcode() != PPC::SELECT_I4 &&
+ User->getMachineOpcode() != PPC::SELECT_I8)
+ return false;
+
+ SDNode *Op2 = User->getOperand(2).getNode();
+ if (!Op2->isMachineOpcode())
+ return false;
+
+ if (Op2->getMachineOpcode() != PPC::LI &&
+ Op2->getMachineOpcode() != PPC::LI8)
+ return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));
+ if (!C)
+ return false;
+
+ if (!C->isNullValue())
+ return false;
+ }
+
+ return true;
+}
+
+void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
+ SmallVector<SDNode *, 4> ToReplace;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
+ User->getMachineOpcode() == PPC::SELECT_I8) &&
+ "Must have all select users");
+ ToReplace.push_back(User);
+ }
+
+ for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
+ UE = ToReplace.end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ SDNode *ResNode =
+ CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
+ User->getValueType(0), User->getOperand(0),
+ User->getOperand(2),
+ User->getOperand(1));
+
+ DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
+ DEBUG(User->dump(CurDAG));
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ ReplaceUses(User, ResNode);
+ }
+}
+
+void PPCDAGToDAGISel::PeepholeCROps() {
+ bool IsModified;
+ do {
+ IsModified = false;
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ++I) {
+ MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
+ if (!MachineNode || MachineNode->use_empty())
+ continue;
+ SDNode *ResNode = MachineNode;
+
+ bool Op1Set = false, Op1Unset = false,
+ Op1Not = false,
+ Op2Set = false, Op2Unset = false,
+ Op2Not = false;
+
+ unsigned Opcode = MachineNode->getMachineOpcode();
+ switch (Opcode) {
+ default: break;
+ case PPC::CRAND:
+ case PPC::CRNAND:
+ case PPC::CROR:
+ case PPC::CRXOR:
+ case PPC::CRNOR:
+ case PPC::CREQV:
+ case PPC::CRANDC:
+ case PPC::CRORC: {
+ SDValue Op = MachineNode->getOperand(1);
+ if (Op.isMachineOpcode()) {
+ if (Op.getMachineOpcode() == PPC::CRSET)
+ Op2Set = true;
+ else if (Op.getMachineOpcode() == PPC::CRUNSET)
+ Op2Unset = true;
+ else if (Op.getMachineOpcode() == PPC::CRNOR &&
+ Op.getOperand(0) == Op.getOperand(1))
+ Op2Not = true;
+ }
+ } // fallthrough
+ case PPC::BC:
+ case PPC::BCn:
+ case PPC::SELECT_I4:
+ case PPC::SELECT_I8:
+ case PPC::SELECT_F4:
+ case PPC::SELECT_F8:
+ case PPC::SELECT_VRRC: {
+ SDValue Op = MachineNode->getOperand(0);
+ if (Op.isMachineOpcode()) {
+ if (Op.getMachineOpcode() == PPC::CRSET)
+ Op1Set = true;
+ else if (Op.getMachineOpcode() == PPC::CRUNSET)
+ Op1Unset = true;
+ else if (Op.getMachineOpcode() == PPC::CRNOR &&
+ Op.getOperand(0) == Op.getOperand(1))
+ Op1Not = true;
+ }
+ }
+ break;
+ }
+
+ bool SelectSwap = false;
+ switch (Opcode) {
+ default: break;
+ case PPC::CRAND:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // x & x = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Set)
+ // 1 & y = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Set)
+ // x & 1 = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Unset || Op2Unset)
+ // x & 0 = 0 & y = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Not)
+ // ~x & y = andc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0).
+ getOperand(0));
+ else if (Op2Not)
+ // x & ~y = andc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRNAND:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // nand(x, x) -> nor(x, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Set)
+ // nand(1, y) -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Set)
+ // nand(x, 1) -> nor(x, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Unset || Op2Unset)
+ // nand(x, 0) = nand(0, y) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Not)
+ // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // nand(x, ~y) = ~x | y = orc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1).
+ getOperand(0),
+ MachineNode->getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CROR:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // x | x = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Set || Op2Set)
+ // x | 1 = 1 | y = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Unset)
+ // 0 | y = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Unset)
+ // x | 0 = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Not)
+ // ~x | y = orc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0).
+ getOperand(0));
+ else if (Op2Not)
+ // x | ~y = orc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRXOR:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // xor(x, x) = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set)
+ // xor(1, y) -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Set)
+ // xor(x, 1) -> nor(x, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Unset)
+ // xor(0, y) = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Unset)
+ // xor(x, 0) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Not)
+ // xor(~x, y) = eqv(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // xor(x, ~y) = eqv(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRNOR:
+ if (Op1Set || Op2Set)
+ // nor(1, y) -> 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Unset)
+ // nor(0, y) = ~y -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Unset)
+ // nor(x, 0) = ~x
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Not)
+ // nor(~x, y) = andc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // nor(x, ~y) = andc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1).
+ getOperand(0),
+ MachineNode->getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CREQV:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // eqv(x, x) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set)
+ // eqv(1, y) = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Set)
+ // eqv(x, 1) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Unset)
+ // eqv(0, y) = ~y -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Unset)
+ // eqv(x, 0) = ~x
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Not)
+ // eqv(~x, y) = xor(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // eqv(x, ~y) = xor(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRANDC:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // andc(x, x) = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set)
+ // andc(1, y) = ~y
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op1Unset || Op2Set)
+ // andc(0, y) = andc(x, 1) = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op2Unset)
+ // andc(x, 0) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Not)
+ // andc(~x, y) = ~(x | y) = nor(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // andc(x, ~y) = x & y
+ ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0)),
+ SelectSwap = true;
+ break;
+ case PPC::CRORC:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // orc(x, x) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set || Op2Unset)
+ // orc(1, y) = orc(x, 0) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op2Set)
+ // orc(x, 1) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Unset)
+ // orc(0, y) = ~y
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op1Not)
+ // orc(~x, y) = ~(x & y) = nand(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // orc(x, ~y) = x | y
+ ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0)),
+ SelectSwap = true;
+ break;
+ case PPC::SELECT_I4:
+ case PPC::SELECT_I8:
+ case PPC::SELECT_F4:
+ case PPC::SELECT_F8:
+ case PPC::SELECT_VRRC:
+ if (Op1Set)
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op1Unset)
+ ResNode = MachineNode->getOperand(2).getNode();
+ else if (Op1Not)
+ ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
+ SDLoc(MachineNode),
+ MachineNode->getValueType(0),
+ MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(2),
+ MachineNode->getOperand(1));
+ break;
+ case PPC::BC:
+ case PPC::BCn:
+ if (Op1Not)
+ ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
+ PPC::BC,
+ SDLoc(MachineNode),
+ MVT::Other,
+ MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1),
+ MachineNode->getOperand(2));
+ // FIXME: Handle Op1Set, Op1Unset here too.
+ break;
+ }
+
+ // If we're inverting this node because it is used only by selects that
+ // we'd like to swap, then swap the selects before the node replacement.
+ if (SelectSwap)
+ SwapAllSelectUsers(MachineNode);
+
+ if (ResNode != MachineNode) {
+ DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
+ DEBUG(MachineNode->dump(CurDAG));
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ ReplaceUses(MachineNode, ResNode);
+ IsModified = true;
+ }
+ }
+ if (IsModified)
+ CurDAG->RemoveDeadNodes();
+ } while (IsModified);
+}
+
+void PPCDAGToDAGISel::PeepholePPC64() {
// These optimizations are currently supported only for 64-bit SVR4.
- if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64())
+ if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
return;
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
@@ -1549,8 +2188,8 @@ FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
static void initializePassOnce(PassRegistry &Registry) {
const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection";
- PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0,
- false, false);
+ PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID,
+ nullptr, false, false);
Registry.registerPass(*PI, true);
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 25a7ca7f59a7..708d36f6f978 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -18,6 +18,8 @@
#include "PPCTargetMachine.h"
#include "PPCTargetObjectFile.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -46,20 +48,21 @@ cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hi
static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
-static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
- if (TM.getSubtargetImpl()->isDarwin())
- return new TargetLoweringObjectFileMachO();
+// FIXME: Remove this once the bug has been fixed!
+extern cl::opt<bool> ANDIGlueBug;
- if (TM.getSubtargetImpl()->isSVR4ABI())
- return new PPC64LinuxTargetObjectFile();
+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
+ // If it isn't a Mach-O file then it's going to be a linux ELF
+ // object file.
+ if (TT.isOSDarwin())
+ return new TargetLoweringObjectFileMachO();
- return new TargetLoweringObjectFileELF();
+ return new PPC64LinuxTargetObjectFile();
}
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
- : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
- const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
-
+ : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))),
+ Subtarget(*TM.getSubtargetImpl()) {
setPow2DivIsCheap();
// Use _setjmp/_longjmp instead of setjmp/longjmp.
@@ -68,7 +71,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
- bool isPPC64 = Subtarget->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
setMinStackArgumentAlignment(isPPC64 ? 8:4);
// Set up the register classes.
@@ -94,6 +97,39 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+ if (Subtarget.useCRBits()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (isPPC64 || Subtarget.hasFPCVT()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+ AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+ AddPromotedToType (ISD::UINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ } else {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
+ }
+
+ // PowerPC does not support direct load / store of condition registers
+ setOperationAction(ISD::LOAD, MVT::i1, Custom);
+ setOperationAction(ISD::STORE, MVT::i1, Custom);
+
+ // FIXME: Remove this once the ANDI glue bug is fixed:
+ if (ANDIGlueBug)
+ setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setTruncStoreAction(MVT::i64, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i16, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+
+ addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
+ }
+
// This is used in the ppcf128->int sequence. Note it has different semantics
// from FP_ROUND: that rounds to nearest, this rounds to zero.
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
@@ -139,17 +175,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
- if (!Subtarget->hasFSQRT() &&
+ if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath &&
- Subtarget->hasFRSQRTE() && Subtarget->hasFRE()))
+ Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
- if (!Subtarget->hasFSQRT() &&
+ if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath &&
- Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
+ Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- if (Subtarget->hasFCPSGN()) {
+ if (Subtarget.hasFCPSGN()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
} else {
@@ -157,7 +193,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
}
- if (Subtarget->hasFPRND()) {
+ if (Subtarget.hasFPRND()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
@@ -179,7 +215,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
- if (Subtarget->hasPOPCNTD()) {
+ if (Subtarget.hasPOPCNTD()) {
setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
} else {
@@ -191,21 +227,25 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
setOperationAction(ISD::ROTR, MVT::i64 , Expand);
- // PowerPC does not have Select
- setOperationAction(ISD::SELECT, MVT::i32, Expand);
- setOperationAction(ISD::SELECT, MVT::i64, Expand);
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ if (!Subtarget.useCRBits()) {
+ // PowerPC does not have Select
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ }
// PowerPC wants to turn select_cc of FP into fsel when possible.
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
// PowerPC wants to optimize integer setcc a bit
- setOperationAction(ISD::SETCC, MVT::i32, Custom);
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
// PowerPC does not have BRCOND which requires SetCC
- setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
@@ -256,7 +296,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
- if (Subtarget->isSVR4ABI()) {
+ if (Subtarget.isSVR4ABI()) {
if (isPPC64) {
// VAARG always uses double-word chunks, so promote anything smaller.
setOperationAction(ISD::VAARG, MVT::i1, Promote);
@@ -276,7 +316,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
- if (Subtarget->isSVR4ABI() && !isPPC64)
+ if (Subtarget.isSVR4ABI() && !isPPC64)
// VACOPY is custom lowered with the 32-bit SVR4 ABI.
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
else
@@ -309,7 +349,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
- if (Subtarget->has64BitSupport()) {
+ if (Subtarget.has64BitSupport()) {
// They also have instructions for converting between i64 and fp.
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
@@ -319,7 +359,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We cannot do this with Promote because i64 is not a legal type.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64())
+ if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
@@ -327,8 +367,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
// With the instructions enabled under FPCVT, we can do everything.
- if (PPCSubTarget.hasFPCVT()) {
- if (Subtarget->has64BitSupport()) {
+ if (Subtarget.hasFPCVT()) {
+ if (Subtarget.has64BitSupport()) {
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
@@ -341,7 +381,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
}
- if (Subtarget->use64BitRegs()) {
+ if (Subtarget.use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
@@ -357,7 +397,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
- if (Subtarget->hasAltivec()) {
+ if (Subtarget.hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -413,12 +453,15 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
@@ -445,7 +488,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::OR , MVT::v4i32, Legal);
setOperationAction(ISD::XOR , MVT::v4i32, Legal);
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
- setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4i32,
+ Subtarget.useCRBits() ? Legal : Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
@@ -464,7 +508,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
- if (TM.Options.UnsafeFPMath) {
+ if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
@@ -484,16 +528,83 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// Altivec does not contain unordered floating-point compare instructions
setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
-
setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
+
+ if (Subtarget.hasVSX()) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::MUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
+
+ // Share the Altivec comparison restrictions.
+ setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
+
+ setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
+ setOperationAction(ISD::STORE, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
+
+ addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
+
+ addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
+ addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
+
+ // VSX v2i64 only supports non-arithmetic operations.
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+
+ setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
+ setOperationAction(ISD::STORE, MVT::v2i64, Promote);
+ AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+
+ // Vector operation legalization checks the result type of
+ // SIGN_EXTEND_INREG, overall legalization checks the inner type.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
+
+ addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
+ }
}
- if (Subtarget->has64BitSupport()) {
+ if (Subtarget.has64BitSupport()) {
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
}
@@ -507,6 +618,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// Altivec instructions set fields to all zeros or all ones.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+ if (!isPPC64) {
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
+ }
+
if (isPPC64) {
setStackPointerRegisterToSaveRestore(PPC::X1);
setExceptionPointerRegister(PPC::X3);
@@ -522,9 +640,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
+ if (Subtarget.useCRBits())
+ setTargetDAGCombine(ISD::BRCOND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+
+ if (Subtarget.useCRBits()) {
+ setTargetDAGCombine(ISD::TRUNCATE);
+ setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::SELECT_CC);
+ }
+
// Use reciprocal estimates.
if (TM.Options.UnsafeFPMath) {
setTargetDAGCombine(ISD::FDIV);
@@ -532,7 +662,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
// Darwin long double math library functions have $LDBL128 appended.
- if (Subtarget->isDarwin()) {
+ if (Subtarget.isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
@@ -545,18 +675,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
}
+ // With 32 condition bits, we don't need to sink (and duplicate) compares
+ // aggressively in CodeGenPrep.
+ if (Subtarget.useCRBits())
+ setHasMultipleConditionRegisters();
+
setMinFunctionAlignment(2);
- if (PPCSubTarget.isDarwin())
+ if (Subtarget.isDarwin())
setPrefFunctionAlignment(4);
- if (isPPC64 && Subtarget->isJITCodeModel())
+ if (isPPC64 && Subtarget.isJITCodeModel())
// Temporary workaround for the inability of PPC64 JIT to handle jump
// tables.
setSupportJumpTables(false);
setInsertFencesForAtomic(true);
- if (Subtarget->enableMachineScheduler())
+ if (Subtarget.enableMachineScheduler())
setSchedulingPreference(Sched::Source);
else
setSchedulingPreference(Sched::Hybrid);
@@ -565,8 +700,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// The Freescale cores does better with aggressive inlining of memcpy and
// friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
- if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
- Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
+ if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
+ Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
MaxStoresPerMemset = 32;
MaxStoresPerMemsetOptSize = 16;
MaxStoresPerMemcpy = 32;
@@ -610,20 +745,20 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
/// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
// Darwin passes everything on 4 byte boundary.
- if (PPCSubTarget.isDarwin())
+ if (Subtarget.isDarwin())
return 4;
// 16byte and wider vectors are passed on 16byte boundary.
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
- unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
- if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
- getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
+ unsigned Align = Subtarget.isPPC64() ? 8 : 4;
+ if (Subtarget.hasAltivec() || Subtarget.hasQPX())
+ getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
return Align;
}
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return 0;
+ default: return nullptr;
case PPCISD::FSEL: return "PPCISD::FSEL";
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
@@ -637,7 +772,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
- case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE";
case PPCISD::LOAD: return "PPCISD::LOAD";
case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
@@ -670,6 +804,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";
case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";
case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
+ case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
@@ -688,7 +823,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
- return MVT::i32;
+ return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
@@ -717,15 +852,29 @@ static bool isConstantOrUndef(int Op, int Val) {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
-bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
- if (!isUnary) {
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operantion with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG) {
+ if (ShuffleKind == 0) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ return false;
for (unsigned i = 0; i != 16; ++i)
- if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
return false;
- } else {
+ } else if (ShuffleKind == 2) {
+ if (!DAG.getTarget().getDataLayout()->isLittleEndian())
+ return false;
+ for (unsigned i = 0; i != 16; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2))
+ return false;
+ } else if (ShuffleKind == 1) {
+ unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1;
for (unsigned i = 0; i != 8; ++i)
- if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||
- !isConstantOrUndef(N->getMaskElt(i+8), i*2+1))
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
return false;
}
return true;
@@ -733,18 +882,33 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
-bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
- if (!isUnary) {
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operantion with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG) {
+ if (ShuffleKind == 0) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ return false;
for (unsigned i = 0; i != 16; i += 2)
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
return false;
- } else {
+ } else if (ShuffleKind == 2) {
+ if (!DAG.getTarget().getDataLayout()->isLittleEndian())
+ return false;
+ for (unsigned i = 0; i != 16; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
+ return false;
+ } else if (ShuffleKind == 1) {
+ unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 2;
for (unsigned i = 0; i != 8; i += 2)
- if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
- !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||
- !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||
- !isConstantOrUndef(N->getMaskElt(i+9), i*2+3))
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
return false;
}
return true;
@@ -754,8 +918,8 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
///
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned LHSStart, unsigned RHSStart) {
- assert(N->getValueType(0) == MVT::v16i8 &&
- "PPC only supports shuffles by bytes!");
+ if (N->getValueType(0) != MVT::v16i8)
+ return false;
assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!");
@@ -771,29 +935,66 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
}
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
-/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
+/// The ShuffleKind distinguishes between big-endian merges with two
+/// different inputs (0), either-endian merges with two identical inputs (1),
+/// and little-endian merges with two different inputs (2). For the latter,
+/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary) {
- if (!isUnary)
- return isVMerge(N, UnitSize, 8, 24);
- return isVMerge(N, UnitSize, 8, 8);
+ unsigned ShuffleKind, SelectionDAG &DAG) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 0, 0);
+ else if (ShuffleKind == 2) // swapped
+ return isVMerge(N, UnitSize, 0, 16);
+ else
+ return false;
+ } else {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 8, 8);
+ else if (ShuffleKind == 0) // normal
+ return isVMerge(N, UnitSize, 8, 24);
+ else
+ return false;
+ }
}
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
-/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
+/// The ShuffleKind distinguishes between big-endian merges with two
+/// different inputs (0), either-endian merges with two identical inputs (1),
+/// and little-endian merges with two different inputs (2). For the latter,
+/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary) {
- if (!isUnary)
- return isVMerge(N, UnitSize, 0, 16);
- return isVMerge(N, UnitSize, 0, 0);
+ unsigned ShuffleKind, SelectionDAG &DAG) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 8, 8);
+ else if (ShuffleKind == 2) // swapped
+ return isVMerge(N, UnitSize, 8, 24);
+ else
+ return false;
+ } else {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 0, 0);
+ else if (ShuffleKind == 0) // normal
+ return isVMerge(N, UnitSize, 0, 16);
+ else
+ return false;
+ }
}
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
-int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
- assert(N->getValueType(0) == MVT::v16i8 &&
- "PPC only supports shuffles by bytes!");
+/// The ShuffleKind distinguishes between big-endian operations with two
+/// different inputs (0), either-endian operations with two identical inputs
+/// (1), and little-endian operations with two different inputs (2). For the
+/// latter, the input operands are swapped (see PPCInstrAltivec.td).
+int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG) {
+ if (N->getValueType(0) != MVT::v16i8)
+ return -1;
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
@@ -808,19 +1009,26 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
// numbered from this value.
unsigned ShiftAmt = SVOp->getMaskElt(i);
if (ShiftAmt < i) return -1;
+
ShiftAmt -= i;
+ bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();
- if (!isUnary) {
+ if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
// Check the rest of the elements to see if they are consecutive.
for (++i; i != 16; ++i)
if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
return -1;
- } else {
+ } else if (ShuffleKind == 1) {
// Check the rest of the elements to see if they are consecutive.
for (++i; i != 16; ++i)
if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
return -1;
- }
+ } else
+ return -1;
+
+ if (ShuffleKind == 2 && isLE)
+ ShiftAmt = 16 - ShiftAmt;
+
return ShiftAmt;
}
@@ -872,10 +1080,14 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) {
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
-unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
+ SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
- return SVOp->getMaskElt(0) / EltSize;
+ if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
+ else
+ return SVOp->getMaskElt(0) / EltSize;
}
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
@@ -883,7 +1095,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
/// the constant being splatted. The ByteSize field indicates the number of
/// bytes of each element [124] -> [bhw].
SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
- SDValue OpVal(0, 0);
+ SDValue OpVal(nullptr, 0);
// If ByteSize of the splat is bigger than the element size of the
// build_vector, then we have a case where we are checking for a splat where
@@ -902,7 +1114,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
- if (UniquedVals[i&(Multiple-1)].getNode() == 0)
+ if (!UniquedVals[i&(Multiple-1)].getNode())
UniquedVals[i&(Multiple-1)] = N->getOperand(i);
else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
return SDValue(); // no match.
@@ -917,21 +1129,21 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
bool LeadingZero = true;
bool LeadingOnes = true;
for (unsigned i = 0; i != Multiple-1; ++i) {
- if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs.
+ if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
}
// Finally, check the least significant entry.
if (LeadingZero) {
- if (UniquedVals[Multiple-1].getNode() == 0)
+ if (!UniquedVals[Multiple-1].getNode())
return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
if (Val < 16)
return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
}
if (LeadingOnes) {
- if (UniquedVals[Multiple-1].getNode() == 0)
+ if (!UniquedVals[Multiple-1].getNode())
return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
@@ -944,13 +1156,13 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
// Check to see if this buildvec has a single non-undef value in its elements.
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
- if (OpVal.getNode() == 0)
+ if (!OpVal.getNode())
OpVal = N->getOperand(i);
else if (OpVal != N->getOperand(i))
return SDValue();
}
- if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def.
+ if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
unsigned ValSizeInBytes = EltSize;
uint64_t Value = 0;
@@ -999,7 +1211,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
/// sign extension from a 16-bit value. If so, this returns true and the
/// immediate.
static bool isIntS16Immediate(SDNode *N, short &Imm) {
- if (N->getOpcode() != ISD::Constant)
+ if (!isa<ConstantSDNode>(N))
return false;
Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
@@ -1038,12 +1250,12 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
// disjoint.
APInt LHSKnownZero, LHSKnownOne;
APInt RHSKnownZero, RHSKnownOne;
- DAG.ComputeMaskedBits(N.getOperand(0),
- LHSKnownZero, LHSKnownOne);
+ DAG.computeKnownBits(N.getOperand(0),
+ LHSKnownZero, LHSKnownOne);
if (LHSKnownZero.getBoolValue()) {
- DAG.ComputeMaskedBits(N.getOperand(1),
- RHSKnownZero, RHSKnownOne);
+ DAG.computeKnownBits(N.getOperand(1),
+ RHSKnownZero, RHSKnownOne);
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
if (~(LHSKnownZero | RHSKnownZero) == 0) {
@@ -1143,12 +1355,18 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
APInt LHSKnownZero, LHSKnownOne;
- DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
+ DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
- Base = N.getOperand(0);
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
Disp = DAG.getTargetConstant(imm, N.getValueType());
return true;
}
@@ -1161,7 +1379,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1212,7 +1430,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
}
// Otherwise, do it the hard way, using R0 as the base register.
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType());
Index = N;
return true;
@@ -1303,14 +1521,14 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
/// GetLabelAccessInfo - Return true if we should reference labels using a
/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
- unsigned &LoOpFlags, const GlobalValue *GV = 0) {
+ unsigned &LoOpFlags,
+ const GlobalValue *GV = nullptr) {
HiOpFlags = PPCII::MO_HA;
LoOpFlags = PPCII::MO_LO;
- // Don't use the pic base if not in PIC relocation model. Or if we are on a
- // non-darwin platform. We don't support PIC on other platforms yet.
- bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
- TM.getSubtarget<PPCSubtarget>().isDarwin();
+ // Don't use the pic base if not in PIC relocation model.
+ bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
+
if (isPIC) {
HiOpFlags |= PPCII::MO_PIC_FLAG;
LoOpFlags |= PPCII::MO_PIC_FLAG;
@@ -1358,7 +1576,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
@@ -1366,6 +1584,15 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
+ PPCII::MO_PIC_FLAG);
+ SDLoc DL(CP);
+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+ }
+
SDValue CPIHi =
DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
SDValue CPILo =
@@ -1379,7 +1606,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
@@ -1387,6 +1614,15 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+ PPCII::MO_PIC_FLAG);
+ SDLoc DL(GA);
+ return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+ }
+
SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
@@ -1416,7 +1652,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy();
- bool is64bit = PPCSubTarget.isPPC64();
+ bool is64bit = Subtarget.isPPC64();
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
@@ -1431,18 +1667,19 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
}
- if (!is64bit)
- llvm_unreachable("only local-exec is currently supported for ppc32");
-
if (Model == TLSModel::InitialExec) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TLS);
- SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
- SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
- PtrVT, GOTReg, TGA);
+ SDValue GOTPtr;
+ if (is64bit) {
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
+ PtrVT, GOTReg, TGA);
+ } else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
- PtrVT, TGA, TPOffsetHi);
+ PtrVT, TGA, GOTPtr);
return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
}
@@ -1506,7 +1743,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
@@ -1515,6 +1752,14 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
+ GSDN->getOffset(),
+ PPCII::MO_PIC_FLAG);
+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32));
+ }
+
SDValue GAHi =
DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
SDValue GALo =
@@ -1534,6 +1779,27 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDLoc dl(Op);
+ if (Op.getValueType() == MVT::v2i64) {
+ // When the operands themselves are v2i64 values, we need to do something
+ // special because VSX has no underlying comparison operations for these.
+ if (Op.getOperand(0).getValueType() == MVT::v2i64) {
+ // Equality can be handled by casting to the legal type for Altivec
+ // comparisons, everything else needs to be expanded.
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ DAG.getSetCC(dl, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
+ CC));
+ }
+
+ return SDValue();
+ }
+
+ // We handle most of these in the usual way.
+ return Op;
+ }
+
// If we're comparing for equality to zero, expose the fact that this is
// implented as a ctlz/srl pair on ppc, so that the dag combiner can
// fold the new nodes.
@@ -1727,17 +1993,13 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
Entry.Node = Nest; Args.push_back(Entry);
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
- TargetLowering::CallLoweringInfo CLI(Chain,
- Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0,
- CallingConv::C,
- /*isTailCall=*/false,
- /*doesNotRet=*/false,
- /*isReturnValueUsed=*/true,
- DAG.getExternalSymbol("__trampoline_setup", PtrVT),
- Args, DAG, dl);
- std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT),
+ std::move(Args), 0);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
}
@@ -1858,7 +2120,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const uint16_t ArgRegs[] = {
+ static const MCPhysReg ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
@@ -1885,7 +2147,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const uint16_t ArgRegs[] = {
+ static const MCPhysReg ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
@@ -1909,8 +2171,8 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
/// GetFPR - Get the set of FP registers that should be allocated for arguments,
/// on Darwin.
-static const uint16_t *GetFPR() {
- static const uint16_t FPR[] = {
+static const MCPhysReg *GetFPR() {
+ static const MCPhysReg FPR[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
};
@@ -1922,14 +2184,119 @@ static const uint16_t *GetFPR() {
/// the stack.
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
unsigned PtrByteSize) {
- unsigned ArgSize = ArgVT.getSizeInBits()/8;
+ unsigned ArgSize = ArgVT.getStoreSize();
if (Flags.isByVal())
ArgSize = Flags.getByValSize();
- ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+ // Round up to multiples of the pointer size, except for array members,
+ // which are always packed.
+ if (!Flags.isInConsecutiveRegs())
+ ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
return ArgSize;
}
+/// CalculateStackSlotAlignment - Calculates the alignment of this argument
+/// on the stack.
+static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
+ ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize) {
+ unsigned Align = PtrByteSize;
+
+ // Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+ Align = 16;
+
+ // ByVal parameters are aligned as requested.
+ if (Flags.isByVal()) {
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > PtrByteSize) {
+ if (BVAlign % PtrByteSize != 0)
+ llvm_unreachable(
+ "ByVal alignment is not a multiple of the pointer size");
+
+ Align = BVAlign;
+ }
+ }
+
+ // Array members are always packed to their original alignment.
+ if (Flags.isInConsecutiveRegs()) {
+ // If the array member was split into multiple registers, the first
+ // needs to be aligned to the size of the full type. (Except for
+ // ppcf128, which is only aligned as its f64 components.)
+ if (Flags.isSplit() && OrigVT != MVT::ppcf128)
+ Align = OrigVT.getStoreSize();
+ else
+ Align = ArgVT.getStoreSize();
+ }
+
+ return Align;
+}
+
+/// CalculateStackSlotUsed - Return whether this argument will use its
+/// stack slot (instead of being passed in registers). ArgOffset,
+/// AvailableFPRs, and AvailableVRs must hold the current argument
+/// position, and will be updated to account for this argument.
+static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
+ ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize,
+ unsigned LinkageSize,
+ unsigned ParamAreaSize,
+ unsigned &ArgOffset,
+ unsigned &AvailableFPRs,
+ unsigned &AvailableVRs) {
+ bool UseMemory = false;
+
+ // Respect alignment of argument on the stack.
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ // If there's no space left in the argument save area, we must
+ // use memory (this check also catches zero-sized arguments).
+ if (ArgOffset >= LinkageSize + ParamAreaSize)
+ UseMemory = true;
+
+ // Allocate argument on the stack.
+ ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // If we overran the argument save area, we must use memory
+ // (this check catches arguments passed partially in memory)
+ if (ArgOffset > LinkageSize + ParamAreaSize)
+ UseMemory = true;
+
+ // However, if the argument is actually passed in an FPR or a VR,
+ // we don't use memory after all.
+ if (!Flags.isByVal()) {
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
+ if (AvailableFPRs > 0) {
+ --AvailableFPRs;
+ return false;
+ }
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+ if (AvailableVRs > 0) {
+ --AvailableVRs;
+ return false;
+ }
+ }
+
+ return UseMemory;
+}
+
+/// EnsureStackAlignment - Round stack frame size up from NumBytes to
+/// ensure minimum alignment required for target.
+static unsigned EnsureStackAlignment(const TargetMachine &Target,
+ unsigned NumBytes) {
+ unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1;
+ NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+ return NumBytes;
+}
+
SDValue
PPCTargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -1938,8 +2305,8 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
- if (PPCSubTarget.isSVR4ABI()) {
- if (PPCSubTarget.isPPC64())
+ if (Subtarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64())
return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
dl, DAG, InVals);
else
@@ -2005,7 +2372,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
getTargetMachine(), ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
- CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false);
+ CCInfo.AllocateStack(LinkageSize, PtrByteSize);
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -2020,6 +2388,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
switch (ValVT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("ValVT not supported by formal arguments Lowering");
+ case MVT::i1:
case MVT::i32:
RC = &PPC::GPRCRegClass;
break;
@@ -2027,7 +2396,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
RC = &PPC::F4RCRegClass;
break;
case MVT::f64:
- RC = &PPC::F8RCRegClass;
+ if (Subtarget.hasVSX())
+ RC = &PPC::VSFRCRegClass;
+ else
+ RC = &PPC::F8RCRegClass;
break;
case MVT::v16i8:
case MVT::v8i16:
@@ -2035,18 +2407,26 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
case MVT::v4f32:
RC = &PPC::VRRCRegClass;
break;
+ case MVT::v2f64:
+ case MVT::v2i64:
+ RC = &PPC::VSHRCRegClass;
+ break;
}
// Transform the arguments stored in physical registers into virtual ones.
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
+ ValVT == MVT::i1 ? MVT::i32 : ValVT);
+
+ if (ValVT == MVT::i1)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
InVals.push_back(ArgValue);
} else {
// Argument stored in memory.
assert(VA.isMemLoc());
- unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+ unsigned ArgSize = VA.getLocVT().getStoreSize();
int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
isImmutable);
@@ -2072,36 +2452,27 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Area that is at least reserved in the caller of this function.
unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
+ MinReservedArea = std::max(MinReservedArea, LinkageSize);
// Set the size that is at least reserved in caller of this function. Tail
// call optimized function's reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
-
- MinReservedArea =
- std::max(MinReservedArea,
- PPCFrameLowering::getMinCallFrameSize(false, false));
-
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
-
- FI->setMinReservedArea(MinReservedArea);
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
SmallVector<SDValue, 8> MemOps;
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- static const uint16_t GPArgRegs[] = {
+ static const MCPhysReg GPArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
- static const uint16_t FPArgRegs[] = {
+ static const MCPhysReg FPArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
@@ -2163,8 +2534,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
}
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl,
- MVT::Other, &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return Chain;
}
@@ -2182,33 +2552,7 @@ PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
DAG.getValueType(ObjectVT));
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
-}
-
-// Set the size that is at least reserved in caller of this function. Tail
-// call optimized functions' reserved stack space needs to be aligned so that
-// taking the difference between two stack areas will result in an aligned
-// stack.
-void
-PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
- unsigned nAltivecParamsAtEnd,
- unsigned MinReservedArea,
- bool isPPC64) const {
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- // Add the Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += 16*nAltivecParamsAtEnd;
- }
- MinReservedArea =
- std::max(MinReservedArea,
- PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
- unsigned TargetAlign
- = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
- FI->setMinReservedArea(MinReservedArea);
+ return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
}
SDValue
@@ -2221,6 +2565,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SmallVectorImpl<SDValue> &InVals) const {
// TODO: add description of PPC stack frame format, or at least some docs.
//
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
+ bool isLittleEndian = Subtarget.isLittleEndian();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
@@ -2231,63 +2577,75 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = 8;
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
- // Area that is at least reserved in caller of this function.
- unsigned MinReservedArea = ArgOffset;
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+ isELFv2ABI);
- static const uint16_t GPR[] = {
+ static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const uint16_t *FPR = GetFPR();
+ static const MCPhysReg *FPR = GetFPR();
- static const uint16_t VR[] = {
+ static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
+ static const MCPhysReg VSRH[] = {
+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+ };
const unsigned Num_GPR_Regs = array_lengthof(GPR);
const unsigned Num_FPR_Regs = 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
- unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ // Do a first pass over the arguments to determine whether the ABI
+ // guarantees that our caller has allocated the parameter save area
+ // on its stack frame. In the ELFv1 ABI, this is always the case;
+ // in the ELFv2 ABI, it is true if this is a vararg function or if
+ // any parameter is located in a stack slot.
+
+ bool HasParameterArea = !isELFv2ABI || isVarArg;
+ unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
+ unsigned NumBytes = LinkageSize;
+ unsigned AvailableFPRs = Num_FPR_Regs;
+ unsigned AvailableVRs = Num_VR_Regs;
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i)
+ if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
+ PtrByteSize, LinkageSize, ParamAreaSize,
+ NumBytes, AvailableFPRs, AvailableVRs))
+ HasParameterArea = true;
// Add DAG nodes to load the arguments or copy them out of registers. On
// entry to a function on PPC, the arguments start after the linkage area,
// although the first ones are often in registers.
+ unsigned ArgOffset = LinkageSize;
+ unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
SmallVector<SDValue, 8> MemOps;
- unsigned nAltivecParamsAtEnd = 0;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
- unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ EVT OrigVT = Ins[ArgNo].ArgVT;
+ unsigned ObjSize = ObjectVT.getStoreSize();
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
CurArgIdx = Ins[ArgNo].OrigArgIndex;
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
unsigned CurArgOffset = ArgOffset;
- // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
- if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
- ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
- if (isVarArg) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += CalculateStackSlotSize(ObjectVT,
- Flags,
- PtrByteSize);
- } else
- nAltivecParamsAtEnd++;
- } else
- // Calculate min reserved area.
- MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
- Flags,
- PtrByteSize);
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
@@ -2309,21 +2667,31 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
continue;
}
- unsigned BVAlign = Flags.getByValAlign();
- if (BVAlign > 8) {
- ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
- CurArgOffset = ArgOffset;
- }
-
- // All aggregates smaller than 8 bytes must be passed right-justified.
- if (ObjSize < PtrByteSize)
- CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
- // The value of the object is its address.
- int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+ // Create a stack object covering all stack doublewords occupied
+ // by the argument. If the argument is (fully or partially) on
+ // the stack, or if the argument is fully in registers but the
+ // caller has allocated the parameter save anyway, we can refer
+ // directly to the caller's stack frame. Otherwise, create a
+ // local copy in our own frame.
+ int FI;
+ if (HasParameterArea ||
+ ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
+ FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true);
+ else
+ FI = MFI->CreateStackObject(ArgSize, Align, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- InVals.push_back(FIN);
- if (ObjSize < 8) {
+ // Handle aggregates smaller than 8 bytes.
+ if (ObjSize < PtrByteSize) {
+ // The value of the object is its address, which differs from the
+ // address of the enclosing doubleword on big-endian systems.
+ SDValue Arg = FIN;
+ if (!isLittleEndian) {
+ SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT);
+ Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
+ }
+ InVals.push_back(Arg);
+
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
@@ -2332,25 +2700,19 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
EVT ObjType = (ObjSize == 1 ? MVT::i8 :
(ObjSize == 2 ? MVT::i16 : MVT::i32));
- Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
MachinePointerInfo(FuncArg),
ObjType, false, false, 0);
} else {
// For sizes that don't fit a truncating store (3, 5, 6, 7),
// store the whole register as-is to the parameter save area
- // slot. The address of the parameter was already calculated
- // above (InVals.push_back(FIN)) to be the right-justified
- // offset within the slot. For this store, we need a new
- // frame index that points at the beginning of the slot.
- int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ // slot.
Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(FuncArg),
false, false, 0);
}
MemOps.push_back(Store);
- ++GPR_idx;
}
// Whether we copied from a register or not, advance the offset
// into the parameter save area by a full doubleword.
@@ -2358,44 +2720,48 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
continue;
}
+ // The value of the object is its address, which is the address of
+ // its first stack doubleword.
+ InVals.push_back(FIN);
+
+ // Store whatever pieces of the object are in registers to memory.
for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
- // Store whatever pieces of the object are in registers
- // to memory. ArgOffset will be the address of the beginning
- // of the object.
- if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg;
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
- int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, j),
- false, false, 0);
- MemOps.push_back(Store);
- ++GPR_idx;
- ArgOffset += PtrByteSize;
- } else {
- ArgOffset += ArgSize - j;
+ if (GPR_idx == Num_GPR_Regs)
break;
+
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Addr = FIN;
+ if (j) {
+ SDValue Off = DAG.getConstant(j, PtrVT);
+ Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
}
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
+ MachinePointerInfo(FuncArg, j),
+ false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
}
+ ArgOffset += ArgSize;
continue;
}
switch (ObjectVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
case MVT::i32:
case MVT::i64:
+ // These can be scalar arguments or elements of an integer array type
+ // passed directly. Clang may use those instead of "byval" aggregate
+ // types to avoid forcing arguments to memory unnecessarily.
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
- if (ObjectVT == MVT::i32)
+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
-
- ++GPR_idx;
} else {
needsLoad = true;
ArgSize = PtrByteSize;
@@ -2405,63 +2771,76 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::f32:
case MVT::f64:
- // Every 8 bytes of argument space consumes one of the GPRs available for
- // argument passing.
- if (GPR_idx != Num_GPR_Regs) {
- ++GPR_idx;
- }
+ // These can be scalar arguments or elements of a float array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // float aggregates.
if (FPR_idx != Num_FPR_Regs) {
unsigned VReg;
if (ObjectVT == MVT::f32)
VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
else
- VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+ VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ?
+ &PPC::VSFRCRegClass :
+ &PPC::F8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++FPR_idx;
+ } else if (GPR_idx != Num_GPR_Regs) {
+ // This can only ever happen in the presence of f32 array types,
+ // since otherwise we never run out of FPRs before running out
+ // of GPRs.
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::f32) {
+ if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
+ ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
+ DAG.getConstant(32, MVT::i32));
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+ }
+
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
} else {
needsLoad = true;
- ArgSize = PtrByteSize;
}
- ArgOffset += 8;
+ // When passing an array of floats, the array occupies consecutive
+ // space in the argument area; only round up to the next doubleword
+ // at the end of the array. Otherwise, each float takes 8 bytes.
+ ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
+ ArgOffset += ArgSize;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
break;
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
- // Note that vector arguments in registers don't reserve stack space,
- // except in varargs functions.
+ case MVT::v2f64:
+ case MVT::v2i64:
+ // These can be scalar arguments or elements of a vector array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // vector aggregates.
if (VR_idx != Num_VR_Regs) {
- unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
+ MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
+ MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- if (isVarArg) {
- while ((ArgOffset % 16) != 0) {
- ArgOffset += PtrByteSize;
- if (GPR_idx != Num_GPR_Regs)
- GPR_idx++;
- }
- ArgOffset += 16;
- GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
- }
++VR_idx;
} else {
- // Vectors are aligned.
- ArgOffset = ((ArgOffset+15)/16)*16;
- CurArgOffset = ArgOffset;
- ArgOffset += 16;
needsLoad = true;
}
+ ArgOffset += 16;
break;
}
// We need to load the argument to a virtual register if we determined
// above that we ran out of physical registers of the appropriate type.
if (needsLoad) {
- int FI = MFI->CreateFixedObject(ObjSize,
- CurArgOffset + (ArgSize - ObjSize),
- isImmutable);
+ if (ObjSize < ArgSize && !isLittleEndian)
+ CurArgOffset += ArgSize - ObjSize;
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
false, false, false, 0);
@@ -2470,11 +2849,19 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
InVals.push_back(ArgVal);
}
+ // Area that is at least reserved in the caller of this function.
+ unsigned MinReservedArea;
+ if (HasParameterArea)
+ MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
+ else
+ MinReservedArea = LinkageSize;
+
// Set the size that is at least reserved in caller of this function. Tail
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true);
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
@@ -2488,7 +2875,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// If this function is vararg, store any remaining integer argument regs
// to their spots on the stack so that they may be loaded by deferencing the
// result of va_next.
- for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx < Num_GPR_Regs; ++GPR_idx) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -2501,8 +2889,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
}
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl,
- MVT::Other, &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return Chain;
}
@@ -2528,22 +2915,24 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
+ false);
+ unsigned ArgOffset = LinkageSize;
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
- static const uint16_t GPR_32[] = { // 32-bit registers.
+ static const MCPhysReg GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
- static const uint16_t GPR_64[] = { // 64-bit registers.
+ static const MCPhysReg GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const uint16_t *FPR = GetFPR();
+ static const MCPhysReg *FPR = GetFPR();
- static const uint16_t VR[] = {
+ static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
@@ -2554,7 +2943,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
+ const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
// In 32-bit non-varargs functions, the stack space for vectors is after the
// stack space for non-vectors. We do not use this space unless we have
@@ -2581,6 +2970,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
switch(ObjectVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
case MVT::i32:
case MVT::f32:
VecArgOffset += 4;
@@ -2704,11 +3094,16 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
switch (ObjectVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
case MVT::i32:
if (!isPPC64) {
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+
+ if (ObjectVT == MVT::i1)
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
+
++GPR_idx;
} else {
needsLoad = true;
@@ -2724,7 +3119,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
- if (ObjectVT == MVT::i32)
+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
@@ -2813,11 +3208,21 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
InVals.push_back(ArgVal);
}
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+
+ // Area that is at least reserved in the caller of this function.
+ MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
+
// Set the size that is at least reserved in caller of this function. Tail
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64);
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
@@ -2851,80 +3256,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
}
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl,
- MVT::Other, &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return Chain;
}
-/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus
-/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI.
-static unsigned
-CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
- bool isPPC64,
- bool isVarArg,
- unsigned CC,
- const SmallVectorImpl<ISD::OutputArg>
- &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- unsigned &nAltivecParamsAtEnd) {
- // Count how many bytes are to be pushed on the stack, including the linkage
- // area, and parameter passing area. We start with 24/48 bytes, which is
- // prereserved space for [SP][CR][LR][3 x unused].
- unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
- unsigned NumOps = Outs.size();
- unsigned PtrByteSize = isPPC64 ? 8 : 4;
-
- // Add up all the space actually used.
- // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
- // they all go in registers, but we must reserve stack space for them for
- // possible use by the caller. In varargs or 64-bit calls, parameters are
- // assigned stack space in order, with padding so Altivec parameters are
- // 16-byte aligned.
- nAltivecParamsAtEnd = 0;
- for (unsigned i = 0; i != NumOps; ++i) {
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
- EVT ArgVT = Outs[i].VT;
- // Varargs Altivec parameters are padded to a 16 byte boundary.
- if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
- ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
- if (!isVarArg && !isPPC64) {
- // Non-varargs Altivec parameters go after all the non-Altivec
- // parameters; handle those later so we know how much padding we need.
- nAltivecParamsAtEnd++;
- continue;
- }
- // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
- NumBytes = ((NumBytes+15)/16)*16;
- }
- NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
- }
-
- // Allow for Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- NumBytes = ((NumBytes+15)/16)*16;
- NumBytes += 16*nAltivecParamsAtEnd;
- }
-
- // The prolog code of the callee may store up to 8 GPR argument registers to
- // the stack, allowing va_start to index over them in memory if its varargs.
- // Because we cannot tell if this is needed on the caller side, we have to
- // conservatively assume that it is needed. As such, make sure we have at
- // least enough stack space for the caller to store the 8 GPRs.
- NumBytes = std::max(NumBytes,
- PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
-
- // Tail call needs the stack to be aligned.
- if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().
- getFrameLowering()->getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- NumBytes = (NumBytes + AlignMask) & ~AlignMask;
- }
-
- return NumBytes;
-}
-
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
/// adjusted to accommodate the arguments for the tailcall.
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
@@ -2967,7 +3303,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (Flags.isByVal()) return false;
}
- // Non PIC/GOT tail calls are supported.
+ // Non-PIC/GOT tail calls are supported.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
return true;
@@ -2985,12 +3321,12 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
/// 32-bit value is representable in the immediate field of a BxA instruction.
static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
- if (!C) return 0;
+ if (!C) return nullptr;
int Addr = C->getZExtValue();
if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
SignExtend32<26>(Addr) != Addr)
- return 0; // Top 6 bits have to be sext of immediate.
+ return nullptr; // Top 6 bits have to be sext of immediate.
return DAG.getConstant((int)C->getZExtValue() >> 2,
DAG.getTargetLoweringInfo().getPointerTy()).getNode();
@@ -3096,7 +3432,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
SDLoc dl) const {
if (SPDiff) {
// Load the LR and FP stack slot for later adjusting.
- EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
+ EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
LROpOut = getReturnAddrFrameIndex(DAG);
LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
false, false, false, 0);
@@ -3126,8 +3462,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
SDLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
- false, false, MachinePointerInfo(0),
- MachinePointerInfo(0));
+ false, false, MachinePointerInfo(),
+ MachinePointerInfo());
}
/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
@@ -3172,8 +3508,7 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
MemOpChains2, dl);
if (!MemOpChains2.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains2[0], MemOpChains2.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
@@ -3190,10 +3525,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
- const PPCSubtarget &PPCSubTarget) {
+ const PPCSubtarget &Subtarget) {
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
NodeTys.push_back(MVT::Other); // Returns a chain
@@ -3202,11 +3538,12 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
unsigned CallOpc = PPCISD::CALL;
bool needIndirectCall = true;
- if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
- // If this is an absolute destination address, use the munged value.
- Callee = SDValue(Dest, 0);
- needIndirectCall = false;
- }
+ if (!isSVR4ABI || !isPPC64)
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
+ // If this is an absolute destination address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ needIndirectCall = false;
+ }
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
// XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
@@ -3214,15 +3551,18 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// far-call stubs may be outside relocation limits for a BL instruction.
if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
unsigned OpFlags = 0;
- if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (PPCSubTarget.getTargetTriple().isMacOSX() &&
- PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
+ if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
(G->getGlobal()->isDeclaration() ||
- G->getGlobal()->isWeakForLinker())) {
+ G->getGlobal()->isWeakForLinker())) ||
+ (Subtarget.isTargetELF() && !isPPC64 &&
+ !G->getGlobal()->hasLocalLinkage() &&
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
- OpFlags = PPCII::MO_DARWIN_STUB;
+ OpFlags = PPCII::MO_PLT_OR_STUB;
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common,
@@ -3238,13 +3578,15 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
- if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (PPCSubTarget.getTargetTriple().isMacOSX() &&
- PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
+ (Subtarget.isTargetELF() && !isPPC64 &&
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_) ) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
- OpFlags = PPCII::MO_DARWIN_STUB;
+ OpFlags = PPCII::MO_PLT_OR_STUB;
}
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
@@ -3257,7 +3599,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// to do the call, we can't use PPCISD::CALL.
SDValue MTCTROps[] = {Chain, Callee, InFlag};
- if (isSVR4ABI && isPPC64) {
+ if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
// Function pointers in the 64-bit SVR4 ABI do not point to the function
// entry point, but to the function descriptor (the function entry point
// address is part of the function descriptor though).
@@ -3287,8 +3629,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// Load the address of the function entry point from the function
// descriptor.
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
- SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
- InFlag.getNode() ? 3 : 2);
+ SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs,
+ makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
Chain = LoadFuncPtr.getValue(1);
InFlag = LoadFuncPtr.getValue(2);
@@ -3314,8 +3656,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// additional register being allocated and an unnecessary move instruction
// being generated.
VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue TOCOff = DAG.getIntPtrConstant(8);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
- Callee, InFlag);
+ AddTOC, InFlag);
Chain = LoadTOCPtr.getValue(0);
InFlag = LoadTOCPtr.getValue(1);
@@ -3324,8 +3668,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
MTCTROps[2] = InFlag;
}
- Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
- 2 + (InFlag.getNode() != 0));
+ Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
+ makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
InFlag = Chain.getValue(1);
NodeTys.clear();
@@ -3333,9 +3677,9 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
NodeTys.push_back(MVT::Glue);
Ops.push_back(Chain);
CallOpc = PPCISD::BCTRL;
- Callee.setNode(0);
+ Callee.setNode(nullptr);
// Add use of X11 (holding environment pointer)
- if (isSVR4ABI && isPPC64)
+ if (isSVR4ABI && isPPC64 && !isELFv2ABI)
Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
@@ -3357,6 +3701,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
+ // Direct calls in the ELFv2 ABI need the TOC register live into the call.
+ if (Callee.getNode() && isELFv2ABI)
+ Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+
return CallOpc;
}
@@ -3426,14 +3774,16 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
int SPDiff, unsigned NumBytes,
const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals) const {
+
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
isTailCall, RegsToPass, Ops, NodeTys,
- PPCSubTarget);
+ Subtarget);
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
- if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
+ if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
// When performing tail call optimization the callee pops its arguments off
@@ -3461,7 +3811,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
isa<ConstantSDNode>(Callee)) &&
"Expecting an global address, external symbol, absolute value or register");
- return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
+ return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
}
// Add a NOP immediately after the branch instruction when using the 64-bit
@@ -3474,7 +3824,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
// same TOC), the NOP will remain unchanged.
bool needsTOCRestore = false;
- if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
+ if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) {
if (CallOpc == PPCISD::BCTRL) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
@@ -3494,12 +3844,17 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
}
}
- Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
if (needsTOCRestore) {
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
+ SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
+ Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
InFlag = Chain.getValue(1);
}
@@ -3531,8 +3886,12 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Ins, DAG);
- if (PPCSubTarget.isSVR4ABI()) {
- if (PPCSubTarget.isPPC64())
+ if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
+
+ if (Subtarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64())
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, Outs, OutVals, Ins,
dl, DAG, InVals);
@@ -3585,7 +3944,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
getTargetMachine(), ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
- CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+ CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false),
+ PtrByteSize);
if (isVarArg) {
// Handle fixed and variable vector arguments differently.
@@ -3611,7 +3971,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
errs() << "Call operand #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << "\n";
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
} else {
@@ -3705,6 +4065,9 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
}
if (VA.isRegLoc()) {
+ if (Arg.getValueType() == MVT::i1)
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
+
seenFloatArg |= VA.getLocVT().isFloatingPoint();
// Put argument in a physical register.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
@@ -3729,8 +4092,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
}
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
@@ -3748,7 +4110,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
SDValue Ops[] = { Chain, InFlag };
Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
- dl, VTs, Ops, InFlag.getNode() ? 2 : 1);
+ dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
InFlag = Chain.getValue(1);
}
@@ -3792,6 +4154,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
+ bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -3808,16 +4172,44 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
- unsigned nAltivecParamsAtEnd = 0;
-
// Count how many bytes are to be pushed on the stack, including the linkage
- // area, and parameter passing area. We start with at least 48 bytes, which
- // is reserved space for [SP][CR][LR][3 x unused].
- // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result
- // of this call.
- unsigned NumBytes =
- CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv,
- Outs, OutVals, nAltivecParamsAtEnd);
+ // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
+ // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
+ // area is 32 bytes reserved space for [SP][CR][LR][TOC].
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+ isELFv2ABI);
+ unsigned NumBytes = LinkageSize;
+
+ // Add up all the space actually used.
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ EVT OrigVT = Outs[i].ArgVT;
+
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ NumBytes = ((NumBytes + Align - 1) / Align) * Align;
+
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ if (Flags.isInConsecutiveRegsLast())
+ NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ }
+
+ unsigned NumBytesActuallyUsed = NumBytes;
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+
+ // Tail call needs the stack to be aligned.
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
@@ -3849,19 +4241,24 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// memory. Also, if this is a vararg function, floating point operations
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
- unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned ArgOffset = LinkageSize;
+ unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
- static const uint16_t GPR[] = {
+ static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const uint16_t *FPR = GetFPR();
+ static const MCPhysReg *FPR = GetFPR();
- static const uint16_t VR[] = {
+ static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
+ static const MCPhysReg VSRH[] = {
+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+ };
+
const unsigned NumGPRs = array_lengthof(GPR);
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
@@ -3873,6 +4270,17 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
for (unsigned i = 0; i != NumOps; ++i) {
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ EVT OrigVT = Outs[i].ArgVT;
+
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, NumGPRs);
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
@@ -3883,7 +4291,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
// Promote integers to 64-bit values.
- if (Arg.getValueType() == MVT::i32) {
+ if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
// FIXME: Should this use ANY_EXTEND if neither sext nor zext?
unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
@@ -3905,15 +4313,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (Size == 0)
continue;
- unsigned BVAlign = Flags.getByValAlign();
- if (BVAlign > 8) {
- if (BVAlign % PtrByteSize != 0)
- llvm_unreachable(
- "ByVal alignment is not a multiple of the pointer size");
-
- ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
- }
-
// All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -3922,7 +4321,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(), VT,
false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
ArgOffset += PtrByteSize;
continue;
@@ -3930,9 +4329,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
}
if (GPR_idx == NumGPRs && Size < 8) {
- SDValue Const = DAG.getConstant(PtrByteSize - Size,
- PtrOff.getValueType());
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ SDValue AddPtr = PtrOff;
+ if (!isLittleEndian) {
+ SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ PtrOff.getValueType());
+ AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ }
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
CallSeqStart,
Flags, DAG, dl);
@@ -3967,8 +4369,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// small aggregates, particularly for packed ones.
// FIXME: It would be preferable to use the slot in the
// parameter save area instead of a new local variable.
- SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ SDValue AddPtr = PtrOff;
+ if (!isLittleEndian) {
+ SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
+ AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ }
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
CallSeqStart,
Flags, DAG, dl);
@@ -3978,7 +4383,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
// Done with this argument.
ArgOffset += PtrByteSize;
@@ -4007,10 +4412,14 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
case MVT::i32:
case MVT::i64:
+ // These can be scalar arguments or elements of an integer array type
+ // passed directly. Clang may use those instead of "byval" aggregate
+ // types to avoid forcing arguments to memory unnecessarily.
if (GPR_idx != NumGPRs) {
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
@@ -4019,40 +4428,70 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
ArgOffset += PtrByteSize;
break;
case MVT::f32:
- case MVT::f64:
- if (FPR_idx != NumFPRs) {
+ case MVT::f64: {
+ // These can be scalar arguments or elements of a float array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // float aggregates.
+
+ // Named arguments go into FPRs first, and once they overflow, the
+ // remaining arguments go into GPRs and then the parameter save area.
+ // Unnamed arguments for vararg functions always go to GPRs and
+ // then the parameter save area. For now, put all arguments to vararg
+ // routines always in both locations (FPR *and* GPR or stack slot).
+ bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
+
+ // First load the argument into the next available FPR.
+ if (FPR_idx != NumFPRs)
RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
- if (isVarArg) {
- // A single float or an aggregate containing only a single float
- // must be passed right-justified in the stack doubleword, and
- // in the GPR, if one is available.
- SDValue StoreOff;
- if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
- SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
- StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
- } else
- StoreOff = PtrOff;
-
- SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff,
- MachinePointerInfo(), false, false, 0);
- MemOpChains.push_back(Store);
-
- // Float varargs are always shadowed in available integer registers
- if (GPR_idx != NumGPRs) {
- SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
- MachinePointerInfo(), false, false,
- false, 0);
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
- }
- } else if (GPR_idx != NumGPRs)
- // If we have any FPRs remaining, we may also have GPRs remaining.
- ++GPR_idx;
+ // Next, load the argument into GPR or stack slot if needed.
+ if (!NeedGPROrStack)
+ ;
+ else if (GPR_idx != NumGPRs) {
+ // In the non-vararg case, this can only ever happen in the
+ // presence of f32 array types, since otherwise we never run
+ // out of FPRs before running out of GPRs.
+ SDValue ArgVal;
+
+ // Double values are always passed in a single GPR.
+ if (Arg.getValueType() != MVT::f32) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+
+ // Non-array float values are extended and passed in a GPR.
+ } else if (!Flags.isInConsecutiveRegs()) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
+
+ // If we have an array of floats, we collect every odd element
+ // together with its predecessor into one GPR.
+ } else if (ArgOffset % PtrByteSize != 0) {
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
+ Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ if (!isLittleEndian)
+ std::swap(Lo, Hi);
+ ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+
+ // The final element, if even, goes into the first half of a GPR.
+ } else if (Flags.isInConsecutiveRegsLast()) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
+ if (!isLittleEndian)
+ ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
+ DAG.getConstant(32, MVT::i32));
+
+ // Non-final even elements are skipped; they will be handled
+ // together the with subsequent argument on the next go-around.
+ } else
+ ArgVal = SDValue();
+
+ if (ArgVal.getNode())
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal));
} else {
// Single-precision floating-point values are mapped to the
// second (rightmost) word of the stack doubleword.
- if (Arg.getValueType() == MVT::f32) {
+ if (Arg.getValueType() == MVT::f32 &&
+ !isLittleEndian && !Flags.isInConsecutiveRegs()) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
}
@@ -4061,27 +4500,32 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
}
- ArgOffset += 8;
+ // When passing an array of floats, the array occupies consecutive
+ // space in the argument area; only round up to the next doubleword
+ // at the end of the array. Otherwise, each float takes 8 bytes.
+ ArgOffset += (Arg.getValueType() == MVT::f32 &&
+ Flags.isInConsecutiveRegs()) ? 4 : 8;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
break;
+ }
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
+ // These can be scalar arguments or elements of a vector array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // vector aggregates.
+
+ // For a varargs call, named arguments go into VRs or on the stack as
+ // usual; unnamed arguments always go to the stack or the corresponding
+ // GPRs when within range. For now, we always put the value in both
+ // locations (or even all three).
if (isVarArg) {
- // These go aligned on the stack, or in the corresponding R registers
- // when within range. The Darwin PPC ABI doc claims they also go in
- // V registers; in fact gcc does this only for arguments that are
- // prototyped, not for those that match the ... We do it for all
- // arguments, seems to work.
- while (ArgOffset % 16 !=0) {
- ArgOffset += PtrByteSize;
- if (GPR_idx != NumGPRs)
- GPR_idx++;
- }
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
- DAG.getConstant(ArgOffset, PtrVT));
SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(), false, false, 0);
MemOpChains.push_back(Store);
@@ -4090,7 +4534,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+
+ unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
+ Arg.getSimpleValueType() == MVT::v2i64) ?
+ VSRH[VR_idx] : VR[VR_idx];
+ ++VR_idx;
+
+ RegsToPass.push_back(std::make_pair(VReg, Load));
}
ArgOffset += 16;
for (unsigned i=0; i<16; i+=PtrByteSize) {
@@ -4106,43 +4556,49 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
break;
}
- // Non-varargs Altivec params generally go in registers, but have
- // stack space allocated at the end.
+ // Non-varargs Altivec params go into VRs or on the stack.
if (VR_idx != NumVRs) {
- // Doesn't have GPR space allocated.
- RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
+ Arg.getSimpleValueType() == MVT::v2i64) ?
+ VSRH[VR_idx] : VR[VR_idx];
+ ++VR_idx;
+
+ RegsToPass.push_back(std::make_pair(VReg, Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
- ArgOffset += 16;
}
+ ArgOffset += 16;
break;
}
}
+ assert(NumBytesActuallyUsed == ArgOffset);
+ (void)NumBytesActuallyUsed;
+
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Check if this is an indirect call (MTCTR/BCTRL).
// See PrepareCall() for more information about calls through function
// pointers in the 64-bit SVR4 ABI.
if (!isTailCall &&
!dyn_cast<GlobalAddressSDNode>(Callee) &&
- !dyn_cast<ExternalSymbolSDNode>(Callee) &&
- !isBLACompatibleAddress(Callee, DAG)) {
+ !dyn_cast<ExternalSymbolSDNode>(Callee)) {
// Load r2 into a virtual register and store it to the TOC save area.
SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
// TOC save area offset.
- SDValue PtrOff = DAG.getIntPtrConstant(40);
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
+ SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
false, false, 0);
- // R12 must contain the address of an indirect callee. This does not
- // mean the MTCTR instruction must use R12; it's easier to model this
- // as an extra parameter, so do that.
- RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
+ // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
+ // This does not mean the MTCTR instruction must use R12; it's easier
+ // to model this as an extra parameter, so do that.
+ if (isELFv2ABI)
+ RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
// Build a sequence of copy-to-reg nodes chained together with token chain
@@ -4190,15 +4646,56 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
- unsigned nAltivecParamsAtEnd = 0;
-
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
// prereserved space for [SP][CR][LR][3 x unused].
- unsigned NumBytes =
- CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
- Outs, OutVals,
- nAltivecParamsAtEnd);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
+ false);
+ unsigned NumBytes = LinkageSize;
+
+ // Add up all the space actually used.
+ // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+ // they all go in registers, but we must reserve stack space for them for
+ // possible use by the caller. In varargs or 64-bit calls, parameters are
+ // assigned stack space in order, with padding so Altivec parameters are
+ // 16-byte aligned.
+ unsigned nAltivecParamsAtEnd = 0;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ // Varargs Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
+ if (!isVarArg && !isPPC64) {
+ // Non-varargs Altivec parameters go after all the non-Altivec
+ // parameters; handle those later so we know how much padding we need.
+ nAltivecParamsAtEnd++;
+ continue;
+ }
+ // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+ NumBytes = ((NumBytes+15)/16)*16;
+ }
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ NumBytes = ((NumBytes+15)/16)*16;
+ NumBytes += 16*nAltivecParamsAtEnd;
+ }
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+
+ // Tail call needs the stack to be aligned.
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
@@ -4234,20 +4731,20 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// memory. Also, if this is a vararg function, floating point operations
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned ArgOffset = LinkageSize;
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- static const uint16_t GPR_32[] = { // 32-bit registers.
+ static const MCPhysReg GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
- static const uint16_t GPR_64[] = { // 64-bit registers.
+ static const MCPhysReg GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const uint16_t *FPR = GetFPR();
+ static const MCPhysReg *FPR = GetFPR();
- static const uint16_t VR[] = {
+ static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
@@ -4255,7 +4752,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
- const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
+ const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
@@ -4338,9 +4835,13 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
case MVT::i32:
case MVT::i64:
if (GPR_idx != NumGPRs) {
+ if (Arg.getValueType() == MVT::i1)
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
+
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
@@ -4481,8 +4982,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
}
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// On Darwin, R12 must contain the address of an indirect callee. This does
// not mean the MTCTR instruction must use R12; it's easier to model this as
@@ -4570,8 +5070,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,
- &RetOps[0], RetOps.size());
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
}
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
@@ -4609,8 +5108,8 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
SDValue
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
@@ -4633,8 +5132,8 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
SDValue
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
@@ -4674,7 +5173,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
// Build a DYNALLOC node.
SDValue Ops[3] = { Chain, NegSize, FPSIdx };
SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
- return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
+ return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
}
SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
@@ -4692,6 +5191,55 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
Op.getOperand(0), Op.getOperand(1));
}
+SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 loads");
+
+ // First, load 8 bits into 32 bits, then truncate to 1 bit.
+
+ SDLoc dl(Op);
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ MachineMemOperand *MMO = LD->getMemOperand();
+
+ SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
+ BasePtr, MVT::i8, MMO);
+ SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
+
+ SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
+ return DAG.getMergeValues(Ops, dl);
+}
+
+SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getOperand(1).getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 stores");
+
+ // First, zero extend to 32 bits, then use a truncating store to 8 bits.
+
+ SDLoc dl(Op);
+ StoreSDNode *ST = cast<StoreSDNode>(Op);
+
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ MachineMemOperand *MMO = ST->getMemOperand();
+
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
+ return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
+}
+
+// FIXME: Remove this once the ANDI glue bug is fixed:
+SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 results");
+
+ SDLoc DL(Op);
+ return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
+ Op.getOperand(0));
+}
+
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -4805,12 +5353,12 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
- (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ :
+ (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ :
PPCISD::FCTIDZ),
dl, MVT::f64, Src);
break;
case MVT::i64:
- assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) &&
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
"i64 FP_TO_UINT is supported only with FPCVT");
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
PPCISD::FCTIDUZ,
@@ -4819,8 +5367,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
}
// Convert the FP value to an int value through memory.
- bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() &&
- (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT());
+ bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
+ (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
@@ -4833,8 +5381,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
- DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
- MVT::i32, MMO);
+ DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
} else
Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
MPI, false, false, 0);
@@ -4858,17 +5405,22 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
- assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
+ if (Op.getOperand(0).getValueType() == MVT::i1)
+ return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
+ DAG.getConstantFP(1.0, Op.getValueType()),
+ DAG.getConstantFP(0.0, Op.getValueType()));
+
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
"UINT_TO_FP is supported only with FPCVT");
// If we have FCFIDS, then use it when converting to single-precision.
// Otherwise, convert to double-precision and then round.
- unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::FCFIDUS : PPCISD::FCFIDS) :
(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::FCFIDU : PPCISD::FCFID);
- MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
MVT::f32 : MVT::f64;
if (Op.getOperand(0).getValueType() == MVT::i64) {
@@ -4884,7 +5436,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// However, if -enable-unsafe-fp-math is in effect, accept double
// rounding to avoid the extra overhead.
if (Op.getValueType() == MVT::f32 &&
- !PPCSubTarget.hasFPCVT() &&
+ !Subtarget.hasFPCVT() &&
!DAG.getTarget().Options.UnsafeFPMath) {
// Twiddle input to make sure the low 11 bits are zero. (If this
@@ -4922,7 +5474,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
- if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl,
MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
@@ -4939,7 +5491,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue Ld;
- if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) {
+ if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -4956,9 +5508,9 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::LFIWZX : PPCISD::LFIWAX,
dl, DAG.getVTList(MVT::f64, MVT::Other),
- Ops, 2, MVT::i32, MMO);
+ Ops, MVT::i32, MMO);
} else {
- assert(PPCSubTarget.isPPC64() &&
+ assert(Subtarget.isPPC64() &&
"i32->FP without LFIWAX supported only on PPC64");
int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
@@ -4980,7 +5532,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// FCFID it and return it.
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
- if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
}
@@ -5010,14 +5562,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
EVT VT = Op.getValueType();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- SDValue MFFSreg, InFlag;
// Save FP Control Word to register
EVT NodeTys[] = {
MVT::f64, // return register
MVT::Glue // unused in this context
};
- SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+ SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
// Save FP register to stack slot
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
@@ -5076,7 +5627,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
SDValue OutOps[] = { OutLo, OutHi };
- return DAG.getMergeValues(OutOps, 2, dl);
+ return DAG.getMergeValues(OutOps, dl);
}
SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
@@ -5105,7 +5656,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
SDValue OutOps[] = { OutLo, OutHi };
- return DAG.getMergeValues(OutOps, 2, dl);
+ return DAG.getMergeValues(OutOps, dl);
}
SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
@@ -5134,7 +5685,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
Tmp4, Tmp6, ISD::SETLE);
SDValue OutOps[] = { OutLo, OutHi };
- return DAG.getMergeValues(OutOps, 2, dl);
+ return DAG.getMergeValues(OutOps, dl);
}
//===----------------------------------------------------------------------===//
@@ -5163,8 +5714,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
SDValue Elt = DAG.getConstant(Val, MVT::i32);
SmallVector<SDValue, 8> Ops;
Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
- SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
- &Ops[0], Ops.size());
+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
}
@@ -5223,7 +5773,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
- assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+ assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;
@@ -5271,10 +5821,14 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// we convert to a pseudo that will be expanded later into one of
// the above forms.
SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
- EVT VT = Op.getValueType();
- int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
- SDValue EltSize = DAG.getConstant(Size, MVT::i32);
- return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
+ EVT VT = (SplatSize == 1 ? MVT::v16i8 :
+ (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
+ SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32);
+ SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
+ if (VT == Op.getValueType())
+ return RetVal;
+ else
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
}
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
@@ -5293,6 +5847,22 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
+ // The remaining cases assume either big endian element order or
+ // a splat-size that equates to the element size of the vector
+ // to be built. An example that doesn't work for little endian is
+ // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
+ // and a vector element size of 16 bits. The code below will
+ // produce the vector in big endian element order, which for little
+ // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
+
+ // For now, just avoid these optimizations in that case.
+ // FIXME: Develop correct optimizations for LE with mismatched
+ // splat and element sizes.
+
+ if (Subtarget.isLittleEndian() &&
+ SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
+ return SDValue();
+
// Check to see if this is a wide variety of vsplti*, binop self cases.
static const signed char SplatCsts[] = {
-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
@@ -5461,6 +6031,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue V2 = Op.getOperand(1);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
EVT VT = Op.getValueType();
+ bool isLittleEndian = Subtarget.isLittleEndian();
// Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
@@ -5469,15 +6040,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (PPC::isSplatShuffleMask(SVOp, 1) ||
PPC::isSplatShuffleMask(SVOp, 2) ||
PPC::isSplatShuffleMask(SVOp, 4) ||
- PPC::isVPKUWUMShuffleMask(SVOp, true) ||
- PPC::isVPKUHUMShuffleMask(SVOp, true) ||
- PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
- PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
- PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
- PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
- PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
- PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
+ PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {
return Op;
}
}
@@ -5485,15 +6056,16 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Altivec has a variety of "shuffle immediates" that take two vector inputs
// and produce a fixed permutation. If any of these match, do not lower to
// VPERM.
- if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
- PPC::isVPKUHUMShuffleMask(SVOp, false) ||
- PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
- PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
- PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
- PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
- PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
- PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, false))
+ unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
+ if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))
return Op;
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
@@ -5527,7 +6099,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// If this shuffle can be expressed as a shuffle of 4-byte elements, use the
// perfect shuffle vector to determine if it is cost effective to do this as
// discrete instructions, or whether we should use a vperm.
- if (isFourElementShuffle) {
+ // For now, we skip this for little endian until such time as we have a
+ // little-endian perfect shuffle table.
+ if (isFourElementShuffle && !isLittleEndian) {
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex =
PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
@@ -5556,6 +6130,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
// that it is in input element units, not in bytes. Convert now.
+
+ // For little endian, the order of the input vectors is reversed, and
+ // the permutation mask is complemented with respect to 31. This is
+ // necessary to produce proper semantics with the big-endian-biased vperm
+ // instruction.
EVT EltVT = V1.getValueType().getVectorElementType();
unsigned BytesPerElement = EltVT.getSizeInBits()/8;
@@ -5564,13 +6143,22 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
for (unsigned j = 0; j != BytesPerElement; ++j)
- ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
- MVT::i32));
+ if (isLittleEndian)
+ ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j),
+ MVT::i32));
+ else
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+ MVT::i32));
}
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
- &ResultMask[0], ResultMask.size());
- return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
+ ResultMask);
+ if (isLittleEndian)
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
+ V2, V1, VPermMask);
+ else
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
+ V1, V2, VPermMask);
}
/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
@@ -5644,7 +6232,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getConstant(CompareOpc, MVT::i32)
};
EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
- SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
// Now that we have the comparison, emit a copy from the CR to a GPR.
// This is flagged to the above dot comparison.
@@ -5685,6 +6273,30 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return Flags;
}
+SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
+ // instructions), but for smaller types, we need to first extend up to v2i32
+ // before doing going farther.
+ if (Op.getValueType() == MVT::v2i64) {
+ EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ if (ExtVT != MVT::v2i32) {
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
+ Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
+ DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
+ ExtVT.getVectorElementType(), 4)));
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
+ Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
+ DAG.getValueType(MVT::v2i32));
+ }
+
+ return Op;
+ }
+
+ return SDValue();
+}
+
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -5739,6 +6351,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
LHS, RHS, Zero, DAG, dl);
} else if (Op.getValueType() == MVT::v16i8) {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ bool isLittleEndian = Subtarget.isLittleEndian();
// Multiply the even 8-bit parts, producing 16-bit sums.
SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
@@ -5750,13 +6363,24 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
LHS, RHS, DAG, dl, MVT::v8i16);
OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
- // Merge the results together.
+ // Merge the results together. Because vmuleub and vmuloub are
+ // instructions with a big-endian bias, we must reverse the
+ // element numbering and reverse the meaning of "odd" and "even"
+ // when generating little endian code.
int Ops[16];
for (unsigned i = 0; i != 8; ++i) {
- Ops[i*2 ] = 2*i+1;
- Ops[i*2+1] = 2*i+1+16;
+ if (isLittleEndian) {
+ Ops[i*2 ] = 2*i;
+ Ops[i*2+1] = 2*i+16;
+ } else {
+ Ops[i*2 ] = 2*i+1;
+ Ops[i*2+1] = 2*i+1+16;
+ }
}
- return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
+ if (isLittleEndian)
+ return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
+ else
+ return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
} else {
llvm_unreachable("Unknown mul to lower!");
}
@@ -5776,21 +6400,24 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::VASTART:
- return LowerVASTART(Op, DAG, PPCSubTarget);
+ return LowerVASTART(Op, DAG, Subtarget);
case ISD::VAARG:
- return LowerVAARG(Op, DAG, PPCSubTarget);
+ return LowerVAARG(Op, DAG, Subtarget);
case ISD::VACOPY:
- return LowerVACOPY(Op, DAG, PPCSubTarget);
+ return LowerVACOPY(Op, DAG, Subtarget);
- case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
+ case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget);
case ISD::DYNAMIC_STACKALLOC:
- return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+ return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
@@ -5809,6 +6436,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
// For counter-based loop handling.
@@ -5852,7 +6480,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
EVT VT = N->getValueType(0);
if (VT == MVT::i64) {
- SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
+ SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget);
Results.push_back(NewNode);
Results.push_back(NewNode.getValue(1));
@@ -5914,8 +6542,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
F->insert(It, loopMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -5964,7 +6591,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
- bool is64bit = PPCSubTarget.isPPC64();
+ bool is64bit = Subtarget.isPPC64();
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -5983,8 +6610,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
F->insert(It, loopMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -6136,7 +6762,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
- llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// Note that the structure of the jmp_buf used here is not compatible
@@ -6160,7 +6786,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
unsigned BufReg = MI->getOperand(1).getReg();
- if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
.addReg(PPC::X2)
.addImm(TOCOffset)
@@ -6173,12 +6799,12 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
unsigned BaseReg;
if (MF->getFunction()->getAttributes().hasAttribute(
AttributeSet::FunctionIndex, Attribute::Naked))
- BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1;
+ BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
else
- BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP;
+ BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
MIB = BuildMI(*thisMBB, MI, DL,
- TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW))
+ TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
.addReg(BaseReg)
.addImm(BPOffset)
.addReg(BufReg);
@@ -6202,10 +6828,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// mainMBB:
// mainDstReg = 0
MIB = BuildMI(mainMBB, DL,
- TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+ TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
// Store IP
- if (PPCSubTarget.isPPC64()) {
+ if (Subtarget.isPPC64()) {
MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
.addReg(LabelReg)
.addImm(LabelOffset)
@@ -6255,7 +6881,10 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Since FP is only updated here but NOT referenced, it's treated as GPR.
unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
- unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;
+ unsigned BP = (PVT == MVT::i64) ? PPC::X30 :
+ (Subtarget.isSVR4ABI() &&
+ MF->getTarget().getRelocationModel() == Reloc::PIC_ ?
+ PPC::R29 : PPC::R30);
MachineInstrBuilder MIB;
@@ -6317,7 +6946,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload TOC
- if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
+ if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
.addImm(TOCOffset)
.addReg(BufReg);
@@ -6355,10 +6984,16 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
- if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
- MI->getOpcode() == PPC::SELECT_CC_I8)) {
+ if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8)) {
SmallVector<MachineOperand, 2> Cond;
- Cond.push_back(MI->getOperand(4));
+ if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8)
+ Cond.push_back(MI->getOperand(4));
+ else
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
Cond.push_back(MI->getOperand(1));
DebugLoc dl = MI->getDebugLoc();
@@ -6370,9 +7005,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_CC_F4 ||
MI->getOpcode() == PPC::SELECT_CC_F8 ||
- MI->getOpcode() == PPC::SELECT_CC_VRRC) {
-
-
+ MI->getOpcode() == PPC::SELECT_CC_VRRC ||
+ MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8 ||
+ MI->getOpcode() == PPC::SELECT_F4 ||
+ MI->getOpcode() == PPC::SELECT_F8 ||
+ MI->getOpcode() == PPC::SELECT_VRRC) {
// The incoming instruction knows the destination vreg to set, the
// condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
@@ -6386,23 +7024,31 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *thisMBB = BB;
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- unsigned SelectPred = MI->getOperand(4).getImm();
DebugLoc dl = MI->getDebugLoc();
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
- BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ if (MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8 ||
+ MI->getOpcode() == PPC::SELECT_F4 ||
+ MI->getOpcode() == PPC::SELECT_F8 ||
+ MI->getOpcode() == PPC::SELECT_VRRC) {
+ BuildMI(BB, dl, TII->get(PPC::BC))
+ .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ } else {
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ }
// copy0MBB:
// %FalseValue = ...
@@ -6458,13 +7104,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
- BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
- BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
+ BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
+ BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
@@ -6504,8 +7150,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
F->insert(It, midMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
@@ -6556,7 +7201,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// We must use 64-bit registers for addresses when targeting 64-bit,
// since we're actually doing arithmetic on them. Other registers
// can be 32-bit.
- bool is64bit = PPCSubTarget.isPPC64();
+ bool is64bit = Subtarget.isPPC64();
bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
unsigned dest = MI->getOperand(0).getReg();
@@ -6575,8 +7220,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
F->insert(It, midMBB);
F->insert(It, exitMBB);
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -6725,6 +7369,27 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
+ } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) {
+ unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ?
+ PPC::ANDIo8 : PPC::ANDIo;
+ bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
+ &PPC::GPRCRegClass :
+ &PPC::G8RCRegClass);
+
+ DebugLoc dl = MI->getDebugLoc();
+ BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
+ .addReg(MI->getOperand(1).getReg()).addImm(1);
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
+ MI->getOperand(0).getReg())
+ .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -6744,9 +7409,10 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
EVT VT = Op.getValueType();
- if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
- (VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+ if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRE()) ||
+ (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal, we need to find the zero of the function:
@@ -6759,7 +7425,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
// correct after every iteration. The minimum architected relative
// accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
// 23 digits and double has 52 digits.
- int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
++Iterations;
@@ -6806,9 +7472,10 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
EVT VT = Op.getValueType();
- if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
- (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+ if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
+ (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal sqrt, we need to find the zero of the function:
@@ -6821,7 +7488,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
// correct after every iteration. The minimum architected relative
// accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
// 23 digits and double has 52 digits.
- int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
++Iterations;
@@ -6899,8 +7566,8 @@ static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
return true;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const GlobalValue *GV1 = NULL;
- const GlobalValue *GV2 = NULL;
+ const GlobalValue *GV1 = nullptr;
+ const GlobalValue *GV2 = nullptr;
int64_t Offset1 = 0;
int64_t Offset2 = 0;
bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
@@ -6938,10 +7605,9 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
if (!Visited.count(ChainLD->getChain().getNode()))
Queue.push_back(ChainLD->getChain().getNode());
} else if (ChainNext->getOpcode() == ISD::TokenFactor) {
- for (SDNode::op_iterator O = ChainNext->op_begin(),
- OE = ChainNext->op_end(); O != OE; ++O)
- if (!Visited.count(O->getNode()))
- Queue.push_back(O->getNode());
+ for (const SDUse &O : ChainNext->ops())
+ if (!Visited.count(O.getNode()))
+ Queue.push_back(O.getNode());
} else
LoadRoots.insert(ChainNext);
}
@@ -6979,6 +7645,534 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
return false;
}
+SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+
+ assert(Subtarget.useCRBits() &&
+ "Expecting to be tracking CR bits");
+ // If we're tracking CR bits, we need to be careful that we don't have:
+ // trunc(binary-ops(zext(x), zext(y)))
+ // or
+ // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
+ // such that we're unnecessarily moving things into GPRs when it would be
+ // better to keep them in CR bits.
+
+ // Note that trunc here can be an actual i1 trunc, or can be the effective
+ // truncation that comes from a setcc or select_cc.
+ if (N->getOpcode() == ISD::TRUNCATE &&
+ N->getValueType(0) != MVT::i1)
+ return SDValue();
+
+ if (N->getOperand(0).getValueType() != MVT::i32 &&
+ N->getOperand(0).getValueType() != MVT::i64)
+ return SDValue();
+
+ if (N->getOpcode() == ISD::SETCC ||
+ N->getOpcode() == ISD::SELECT_CC) {
+ // If we're looking at a comparison, then we need to make sure that the
+ // high bits (all except for the first) don't matter the result.
+ ISD::CondCode CC =
+ cast<CondCodeSDNode>(N->getOperand(
+ N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
+ unsigned OpBits = N->getOperand(0).getValueSizeInBits();
+
+ if (ISD::isSignedIntSetCC(CC)) {
+ if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
+ DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
+ return SDValue();
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (!DAG.MaskedValueIsZero(N->getOperand(0),
+ APInt::getHighBitsSet(OpBits, OpBits-1)) ||
+ !DAG.MaskedValueIsZero(N->getOperand(1),
+ APInt::getHighBitsSet(OpBits, OpBits-1)))
+ return SDValue();
+ } else {
+ // This is neither a signed nor an unsigned comparison, just make sure
+ // that the high bits are equal.
+ APInt Op1Zero, Op1One;
+ APInt Op2Zero, Op2One;
+ DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
+ DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
+
+ // We don't really care about what is known about the first bit (if
+ // anything), so clear it in all masks prior to comparing them.
+ Op1Zero.clearBit(0); Op1One.clearBit(0);
+ Op2Zero.clearBit(0); Op2One.clearBit(0);
+
+ if (Op1Zero != Op2Zero || Op1One != Op2One)
+ return SDValue();
+ }
+ }
+
+ // We now know that the higher-order bits are irrelevant, we just need to
+ // make sure that all of the intermediate operations are bit operations, and
+ // all inputs are extensions.
+ if (N->getOperand(0).getOpcode() != ISD::AND &&
+ N->getOperand(0).getOpcode() != ISD::OR &&
+ N->getOperand(0).getOpcode() != ISD::XOR &&
+ N->getOperand(0).getOpcode() != ISD::SELECT &&
+ N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
+ N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
+ N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
+ return SDValue();
+
+ if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
+ N->getOperand(1).getOpcode() != ISD::AND &&
+ N->getOperand(1).getOpcode() != ISD::OR &&
+ N->getOperand(1).getOpcode() != ISD::XOR &&
+ N->getOperand(1).getOpcode() != ISD::SELECT &&
+ N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
+ N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
+ N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
+ return SDValue();
+
+ SmallVector<SDValue, 4> Inputs;
+ SmallVector<SDValue, 8> BinOps, PromOps;
+ SmallPtrSet<SDNode *, 16> Visited;
+
+ for (unsigned i = 0; i < 2; ++i) {
+ if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
+ N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
+ isa<ConstantSDNode>(N->getOperand(i)))
+ Inputs.push_back(N->getOperand(i));
+ else
+ BinOps.push_back(N->getOperand(i));
+
+ if (N->getOpcode() == ISD::TRUNCATE)
+ break;
+ }
+
+ // Visit all inputs, collect all binary operations (and, or, xor and
+ // select) that are all fed by extensions.
+ while (!BinOps.empty()) {
+ SDValue BinOp = BinOps.back();
+ BinOps.pop_back();
+
+ if (!Visited.insert(BinOp.getNode()))
+ continue;
+
+ PromOps.push_back(BinOp);
+
+ for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
+ // The condition of the select is not promoted.
+ if (BinOp.getOpcode() == ISD::SELECT && i == 0)
+ continue;
+ if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
+ continue;
+
+ if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
+ BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
+ isa<ConstantSDNode>(BinOp.getOperand(i))) {
+ Inputs.push_back(BinOp.getOperand(i));
+ } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
+ BinOp.getOperand(i).getOpcode() == ISD::OR ||
+ BinOp.getOperand(i).getOpcode() == ISD::XOR ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
+ BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
+ BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
+ BinOps.push_back(BinOp.getOperand(i));
+ } else {
+ // We have an input that is not an extension or another binary
+ // operation; we'll abort this transformation.
+ return SDValue();
+ }
+ }
+ }
+
+ // Make sure that this is a self-contained cluster of operations (which
+ // is not quite the same thing as saying that everything has only one
+ // use).
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
+ UE = Inputs[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == Inputs[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == Inputs[i] ||
+ User->getOperand(1) == Inputs[i])
+ return SDValue();
+ }
+ }
+ }
+
+ for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
+ for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
+ UE = PromOps[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == PromOps[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == PromOps[i] ||
+ User->getOperand(1) == PromOps[i])
+ return SDValue();
+ }
+ }
+ }
+
+ // Replace all inputs with the extension operand.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ // Constants may have users outside the cluster of to-be-promoted nodes,
+ // and so we need to replace those as we do the promotions.
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+ else
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
+ }
+
+ // Replace all operations (these are all the same, but have a different
+ // (i1) return type). DAG.getNode will validate that the types of
+ // a binary operator match, so go through the list in reverse so that
+ // we've likely promoted both operands first. Any intermediate truncations or
+ // extensions disappear.
+ while (!PromOps.empty()) {
+ SDValue PromOp = PromOps.back();
+ PromOps.pop_back();
+
+ if (PromOp.getOpcode() == ISD::TRUNCATE ||
+ PromOp.getOpcode() == ISD::SIGN_EXTEND ||
+ PromOp.getOpcode() == ISD::ZERO_EXTEND ||
+ PromOp.getOpcode() == ISD::ANY_EXTEND) {
+ if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
+ PromOp.getOperand(0).getValueType() != MVT::i1) {
+ // The operand is not yet ready (see comment below).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ SDValue RepValue = PromOp.getOperand(0);
+ if (isa<ConstantSDNode>(RepValue))
+ RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
+ continue;
+ }
+
+ unsigned C;
+ switch (PromOp.getOpcode()) {
+ default: C = 0; break;
+ case ISD::SELECT: C = 1; break;
+ case ISD::SELECT_CC: C = 2; break;
+ }
+
+ if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
+ PromOp.getOperand(C).getValueType() != MVT::i1) ||
+ (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
+ PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
+ // The to-be-promoted operands of this node have not yet been
+ // promoted (this should be rare because we're going through the
+ // list backward, but if one of the operands has several users in
+ // this cluster of to-be-promoted nodes, it is possible).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
+ PromOp.getNode()->op_end());
+
+ // If there are any constant inputs, make sure they're replaced now.
+ for (unsigned i = 0; i < 2; ++i)
+ if (isa<ConstantSDNode>(Ops[C+i]))
+ Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp,
+ DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
+ }
+
+ // Now we're left with the initial truncation itself.
+ if (N->getOpcode() == ISD::TRUNCATE)
+ return N->getOperand(0);
+
+ // Otherwise, this is a comparison. The operands to be compared have just
+ // changed type (to i1), but everything else is the same.
+ return SDValue(N, 0);
+}
+
+SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+
+ // If we're tracking CR bits, we need to be careful that we don't have:
+ // zext(binary-ops(trunc(x), trunc(y)))
+ // or
+ // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
+ // such that we're unnecessarily moving things into CR bits that can more
+ // efficiently stay in GPRs. Note that if we're not certain that the high
+ // bits are set as required by the final extension, we still may need to do
+ // some masking to get the proper behavior.
+
+ // This same functionality is important on PPC64 when dealing with
+ // 32-to-64-bit extensions; these occur often when 32-bit values are used as
+ // the return values of functions. Because it is so similar, it is handled
+ // here as well.
+
+ if (N->getValueType(0) != MVT::i32 &&
+ N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ if (!((N->getOperand(0).getValueType() == MVT::i1 &&
+ Subtarget.useCRBits()) ||
+ (N->getOperand(0).getValueType() == MVT::i32 &&
+ Subtarget.isPPC64())))
+ return SDValue();
+
+ if (N->getOperand(0).getOpcode() != ISD::AND &&
+ N->getOperand(0).getOpcode() != ISD::OR &&
+ N->getOperand(0).getOpcode() != ISD::XOR &&
+ N->getOperand(0).getOpcode() != ISD::SELECT &&
+ N->getOperand(0).getOpcode() != ISD::SELECT_CC)
+ return SDValue();
+
+ SmallVector<SDValue, 4> Inputs;
+ SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
+ SmallPtrSet<SDNode *, 16> Visited;
+
+ // Visit all inputs, collect all binary operations (and, or, xor and
+ // select) that are all fed by truncations.
+ while (!BinOps.empty()) {
+ SDValue BinOp = BinOps.back();
+ BinOps.pop_back();
+
+ if (!Visited.insert(BinOp.getNode()))
+ continue;
+
+ PromOps.push_back(BinOp);
+
+ for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
+ // The condition of the select is not promoted.
+ if (BinOp.getOpcode() == ISD::SELECT && i == 0)
+ continue;
+ if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
+ continue;
+
+ if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
+ isa<ConstantSDNode>(BinOp.getOperand(i))) {
+ Inputs.push_back(BinOp.getOperand(i));
+ } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
+ BinOp.getOperand(i).getOpcode() == ISD::OR ||
+ BinOp.getOperand(i).getOpcode() == ISD::XOR ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
+ BinOps.push_back(BinOp.getOperand(i));
+ } else {
+ // We have an input that is not a truncation or another binary
+ // operation; we'll abort this transformation.
+ return SDValue();
+ }
+ }
+ }
+
+ // Make sure that this is a self-contained cluster of operations (which
+ // is not quite the same thing as saying that everything has only one
+ // use).
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
+ UE = Inputs[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == Inputs[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == Inputs[i] ||
+ User->getOperand(1) == Inputs[i])
+ return SDValue();
+ }
+ }
+ }
+
+ for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
+ for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
+ UE = PromOps[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == PromOps[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == PromOps[i] ||
+ User->getOperand(1) == PromOps[i])
+ return SDValue();
+ }
+ }
+ }
+
+ unsigned PromBits = N->getOperand(0).getValueSizeInBits();
+ bool ReallyNeedsExt = false;
+ if (N->getOpcode() != ISD::ANY_EXTEND) {
+ // If all of the inputs are not already sign/zero extended, then
+ // we'll still need to do that at the end.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ unsigned OpBits =
+ Inputs[i].getOperand(0).getValueSizeInBits();
+ assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
+
+ if ((N->getOpcode() == ISD::ZERO_EXTEND &&
+ !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
+ APInt::getHighBitsSet(OpBits,
+ OpBits-PromBits))) ||
+ (N->getOpcode() == ISD::SIGN_EXTEND &&
+ DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
+ (OpBits-(PromBits-1)))) {
+ ReallyNeedsExt = true;
+ break;
+ }
+ }
+ }
+
+ // Replace all inputs, either with the truncation operand, or a
+ // truncation or extension to the final output type.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ // Constant inputs need to be replaced with the to-be-promoted nodes that
+ // use them because they might have users outside of the cluster of
+ // promoted nodes.
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ SDValue InSrc = Inputs[i].getOperand(0);
+ if (Inputs[i].getValueType() == N->getValueType(0))
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
+ else if (N->getOpcode() == ISD::SIGN_EXTEND)
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ else
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ }
+
+ // Replace all operations (these are all the same, but have a different
+ // (promoted) return type). DAG.getNode will validate that the types of
+ // a binary operator match, so go through the list in reverse so that
+ // we've likely promoted both operands first.
+ while (!PromOps.empty()) {
+ SDValue PromOp = PromOps.back();
+ PromOps.pop_back();
+
+ unsigned C;
+ switch (PromOp.getOpcode()) {
+ default: C = 0; break;
+ case ISD::SELECT: C = 1; break;
+ case ISD::SELECT_CC: C = 2; break;
+ }
+
+ if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
+ PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
+ (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
+ PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
+ // The to-be-promoted operands of this node have not yet been
+ // promoted (this should be rare because we're going through the
+ // list backward, but if one of the operands has several users in
+ // this cluster of to-be-promoted nodes, it is possible).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
+ PromOp.getNode()->op_end());
+
+ // If this node has constant inputs, then they'll need to be promoted here.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (!isa<ConstantSDNode>(Ops[C+i]))
+ continue;
+ if (Ops[C+i].getValueType() == N->getValueType(0))
+ continue;
+
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ else
+ Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ }
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp,
+ DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
+ }
+
+ // Now we're left with the initial extension itself.
+ if (!ReallyNeedsExt)
+ return N->getOperand(0);
+
+ // To zero extend, just mask off everything except for the first bit (in the
+ // i1 case).
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
+ DAG.getConstant(APInt::getLowBitsSet(
+ N->getValueSizeInBits(0), PromBits),
+ N->getValueType(0)));
+
+ assert(N->getOpcode() == ISD::SIGN_EXTEND &&
+ "Invalid extension type");
+ EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
+ SDValue ShiftCst =
+ DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy);
+ return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
+ DAG.getNode(ISD::SHL, dl, N->getValueType(0),
+ N->getOperand(0), ShiftCst), ShiftCst);
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
const TargetMachine &TM = getTargetMachine();
@@ -7005,6 +8199,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return N->getOperand(0);
}
break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ return DAGCombineExtBoolTrunc(N, DCI);
+ case ISD::TRUNCATE:
+ case ISD::SETCC:
+ case ISD::SELECT_CC:
+ return DAGCombineTruncBoolExt(N, DCI);
case ISD::FDIV: {
assert(TM.Options.UnsafeFPMath &&
"Reciprocal estimates require UnsafeFPMath");
@@ -7012,7 +8214,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
SDValue RV =
DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
N->getOperand(0), RV);
@@ -7022,7 +8224,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDValue RV =
DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
N->getValueType(0), RV);
@@ -7035,7 +8237,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDValue RV =
DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
N->getValueType(0), RV,
@@ -7047,7 +8249,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
N->getOperand(0), RV);
@@ -7062,12 +8264,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
// reciprocal sqrt.
SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
RV = DAGCombineFastRecip(RV, DCI);
- if (RV.getNode() != 0) {
- // Unfortunately, RV is now NaN if the input was exactly 0. Select out
- // this case and force the answer to 0.
+ if (RV.getNode()) {
+ // Unfortunately, RV is now NaN if the input was exactly 0. Select out
+ // this case and force the answer to 0.
EVT VT = RV.getValueType();
@@ -7143,7 +8345,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
};
Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
- DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+ DAG.getVTList(MVT::Other), Ops,
cast<StoreSDNode>(N)->getMemoryVT(),
cast<StoreSDNode>(N)->getMemOperand());
DCI.AddToWorklist(Val.getNode());
@@ -7170,8 +8372,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
};
return
DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
- Ops, array_lengthof(Ops),
- cast<StoreSDNode>(N)->getMemoryVT(),
+ Ops, cast<StoreSDNode>(N)->getMemoryVT(),
cast<StoreSDNode>(N)->getMemOperand());
}
break;
@@ -7188,6 +8389,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// This is a type-legal unaligned Altivec load.
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
+ bool isLittleEndian = Subtarget.isLittleEndian();
// This implements the loading of unaligned vectors as described in
// the venerable Apple Velocity Engine overview. Specifically:
@@ -7195,25 +8397,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
//
// The general idea is to expand a sequence of one or more unaligned
- // loads into a alignment-based permutation-control instruction (lvsl),
- // a series of regular vector loads (which always truncate their
- // input address to an aligned address), and a series of permutations.
- // The results of these permutations are the requested loaded values.
- // The trick is that the last "extra" load is not taken from the address
- // you might suspect (sizeof(vector) bytes after the last requested
- // load), but rather sizeof(vector) - 1 bytes after the last
- // requested vector. The point of this is to avoid a page fault if the
- // base address happend to be aligned. This works because if the base
- // address is aligned, then adding less than a full vector length will
- // cause the last vector in the sequence to be (re)loaded. Otherwise,
- // the next vector will be fetched as you might suspect was necessary.
+ // loads into an alignment-based permutation-control instruction (lvsl
+ // or lvsr), a series of regular vector loads (which always truncate
+ // their input address to an aligned address), and a series of
+ // permutations. The results of these permutations are the requested
+ // loaded values. The trick is that the last "extra" load is not taken
+ // from the address you might suspect (sizeof(vector) bytes after the
+ // last requested load), but rather sizeof(vector) - 1 bytes after the
+ // last requested vector. The point of this is to avoid a page fault if
+ // the base address happened to be aligned. This works because if the
+ // base address is aligned, then adding less than a full vector length
+ // will cause the last vector in the sequence to be (re)loaded.
+ // Otherwise, the next vector will be fetched as you might suspect was
+ // necessary.
// We might be able to reuse the permutation generation from
// a different base address offset from this one by an aligned amount.
// The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
// optimization later.
- SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr,
- DAG, dl, MVT::v16i8);
+ Intrinsic::ID Intr = (isLittleEndian ?
+ Intrinsic::ppc_altivec_lvsr :
+ Intrinsic::ppc_altivec_lvsl);
+ SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
// Refine the alignment of the original load (a "new" load created here
// which was identical to the first except for the alignment would be
@@ -7262,8 +8467,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (ExtraLoad.getValueType() != MVT::v4i32)
ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
- SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
- BaseLoad, ExtraLoad, PermCntl, DAG, dl);
+ // Because vperm has a big-endian bias, we must reverse the order
+ // of the input vectors and complement the permute control vector
+ // when generating little endian code. We have already handled the
+ // latter by using lvsr instead of lvsl, so just reverse BaseLoad
+ // and ExtraLoad here.
+ SDValue Perm;
+ if (isLittleEndian)
+ Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ ExtraLoad, BaseLoad, PermCntl, DAG, dl);
+ else
+ Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ BaseLoad, ExtraLoad, PermCntl, DAG, dl);
if (VT != MVT::v4i32)
Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
@@ -7288,24 +8503,26 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
++UI;
SmallVector<SDValue, 8> Ops;
- for (SDNode::op_iterator O = User->op_begin(),
- OE = User->op_end(); O != OE; ++O) {
- if (*O == Use)
+ for (const SDUse &O : User->ops()) {
+ if (O == Use)
Ops.push_back(To);
else
- Ops.push_back(*O);
+ Ops.push_back(O);
}
- DAG.UpdateNodeOperands(User, Ops.data(), Ops.size());
+ DAG.UpdateNodeOperands(User, Ops);
}
return SDValue(N, 0);
}
}
break;
- case ISD::INTRINSIC_WO_CHAIN:
- if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() ==
- Intrinsic::ppc_altivec_lvsl &&
+ case ISD::INTRINSIC_WO_CHAIN: {
+ bool isLittleEndian = Subtarget.isLittleEndian();
+ Intrinsic::ID Intr = (isLittleEndian ?
+ Intrinsic::ppc_altivec_lvsr :
+ Intrinsic::ppc_altivec_lvsl);
+ if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
N->getOperand(1)->getOpcode() == ISD::ADD) {
SDValue Add = N->getOperand(1);
@@ -7317,8 +8534,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
UE = BasePtr->use_end(); UI != UE; ++UI) {
if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
- Intrinsic::ppc_altivec_lvsl) {
- // We've found another LVSL, and this address if an aligned
+ Intr) {
+ // We've found another LVSL/LVSR, and this address is an aligned
// multiple of that one. The results will be the same, so use the
// one we've just found instead.
@@ -7327,6 +8544,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
}
+ }
break;
case ISD::BSWAP:
@@ -7349,7 +8567,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
DAG.getVTList(N->getValueType(0) == MVT::i64 ?
MVT::i64 : MVT::i32, MVT::Other),
- Ops, 3, LD->getMemoryVT(), LD->getMemOperand());
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
// If this is an i16 load, insert the truncate.
SDValue ResVal = BSLoad;
@@ -7379,7 +8597,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
!N->getOperand(2).hasOneUse()) {
// Scan all of the users of the LHS, looking for VCMPo's that match.
- SDNode *VCMPoNode = 0;
+ SDNode *VCMPoNode = nullptr;
SDNode *LHSN = N->getOperand(0).getNode();
for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
@@ -7400,9 +8618,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Look at the (necessarily single) use of the flag value. If it has a
// chain, this transformation is more complex. Note that multiple things
// could use the value result, which we should ignore.
- SDNode *FlagUser = 0;
+ SDNode *FlagUser = nullptr;
for (SDNode::use_iterator UI = VCMPoNode->use_begin();
- FlagUser == 0; ++UI) {
+ FlagUser == nullptr; ++UI) {
assert(UI != VCMPoNode->use_end() && "Didn't find user!");
SDNode *User = *UI;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
@@ -7420,6 +8638,25 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
+ case ISD::BRCOND: {
+ SDValue Cond = N->getOperand(1);
+ SDValue Target = N->getOperand(2);
+
+ if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
+ Intrinsic::ppc_is_decremented_ctr_nonzero) {
+
+ // We now need to make the intrinsic dead (it cannot be instruction
+ // selected).
+ DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
+ assert(Cond.getNode()->hasOneUse() &&
+ "Counter decrement has more than one use");
+
+ return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
+ N->getOperand(0), Target);
+ }
+ }
+ break;
case ISD::BR_CC: {
// If this is a branch on an altivec predicate comparison, lower this so
// that we don't have to do a MFOCRF: instead, branch directly on CR6. This
@@ -7488,7 +8725,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAG.getConstant(CompareOpc, MVT::i32)
};
EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
- SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
// Unpack the result based on how the target uses it.
PPC::Predicate CompOpc;
@@ -7524,11 +8761,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Inline Assembly Support
//===----------------------------------------------------------------------===//
-void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const {
+void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
switch (Op.getOpcode()) {
default: break;
@@ -7584,6 +8821,11 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
// suboptimal.
return C_Memory;
}
+ } else if (Constraint == "wc") { // individual CR bits.
+ return C_RegisterClass;
+ } else if (Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf" || Constraint == "ws") {
+ return C_RegisterClass; // VSX registers.
}
return TargetLowering::getConstraintType(Constraint);
}
@@ -7598,10 +8840,21 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
- if (CallOperandVal == NULL)
+ if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
+
// Look at the constraint type.
+ if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
+ return CW_Register; // an individual CR bit.
+ else if ((StringRef(constraint) == "wa" ||
+ StringRef(constraint) == "wd" ||
+ StringRef(constraint) == "wf") &&
+ type->isVectorTy())
+ return CW_Register;
+ else if (StringRef(constraint) == "ws" && type->isDoubleTy())
+ return CW_Register;
+
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
@@ -7639,11 +8892,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
case 'b': // R1-R31
- if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ if (VT == MVT::i64 && Subtarget.isPPC64())
return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
case 'r': // R0-R31
- if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ if (VT == MVT::i64 && Subtarget.isPPC64())
return std::make_pair(0U, &PPC::G8RCRegClass);
return std::make_pair(0U, &PPC::GPRCRegClass);
case 'f':
@@ -7657,6 +8910,13 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
case 'y': // crrc
return std::make_pair(0U, &PPC::CRRCRegClass);
}
+ } else if (Constraint == "wc") { // an individual CR bit.
+ return std::make_pair(0U, &PPC::CRBITRCRegClass);
+ } else if (Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf") {
+ return std::make_pair(0U, &PPC::VSRCRegClass);
+ } else if (Constraint == "ws") {
+ return std::make_pair(0U, &PPC::VSFRCRegClass);
}
std::pair<unsigned, const TargetRegisterClass*> R =
@@ -7668,7 +8928,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// register.
// FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
// the AsmName field from *RegisterInfo.td, then this would not be necessary.
- if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() &&
+ if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
PPC::GPRCRegClass.contains(R.first)) {
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
return std::make_pair(TRI->getMatchingSuperReg(R.first,
@@ -7686,7 +8946,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
- SDValue Result(0,0);
+ SDValue Result;
// Only support length 1 constraints.
if (Constraint.length() > 1) return;
@@ -7792,6 +9052,9 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setReturnAddressIsTaken(true);
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -7799,8 +9062,8 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
// the stack.
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setLRStoreRequired();
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
@@ -7850,6 +9113,30 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
return FrameAddr;
}
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
+ EVT VT) const {
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
+
+ if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
+ (!isPPC64 && VT != MVT::i32))
+ report_fatal_error("Invalid register global variable type");
+
+ bool is64Bit = isPPC64 && VT == MVT::i64;
+ unsigned Reg = StringSwitch<unsigned>(RegName)
+ .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
+ .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2))
+ .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
+ (is64Bit ? PPC::X13 : PPC::R13))
+ .Default(0);
+
+ if (Reg)
+ return Reg;
+ report_fatal_error("Invalid register name global variable");
+}
+
bool
PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The PowerPC target isn't yet aware of offsets.
@@ -7872,14 +9159,51 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
- if (this->PPCSubTarget.isPPC64()) {
+ if (Subtarget.isPPC64()) {
return MVT::i64;
} else {
return MVT::i32;
}
}
+/// \brief Returns true if it is beneficial to convert a load of a constant
+/// to just the constant itself.
+bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0 || BitSize > 64)
+ return false;
+ return true;
+}
+
+bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ return NumBits1 == 64 && NumBits2 == 32;
+}
+
+bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
+ return NumBits1 == 64 && NumBits2 == 32;
+}
+
+bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ return isInt<16>(Imm) || isUInt<16>(Imm);
+}
+
+bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ return isInt<16>(Imm) || isUInt<16>(Imm);
+}
+
bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ unsigned,
bool *Fast) const {
if (DisablePPCUnaligned)
return false;
@@ -7893,8 +9217,14 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
if (!VT.isSimple())
return false;
- if (VT.getSimpleVT().isVector())
- return false;
+ if (VT.getSimpleVT().isVector()) {
+ if (Subtarget.hasVSX()) {
+ if (VT != MVT::v2f64 && VT != MVT::v2i64)
+ return false;
+ } else {
+ return false;
+ }
+ }
if (VT == MVT::ppcf128)
return false;
@@ -7922,8 +9252,17 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
+bool
+PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
+ EVT VT , unsigned DefinedValues) const {
+ if (VT == MVT::v2i64)
+ return false;
+
+ return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
+}
+
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
- if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
+ if (DisableILPPref || Subtarget.enableMachineScheduler())
return TargetLowering::getSchedulingPreference(N);
return Sched::ILP;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index df3af35761ee..c9394dd12e7b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -18,9 +18,8 @@
#include "PPC.h"
#include "PPCInstrInfo.h"
#include "PPCRegisterInfo.h"
-#include "PPCSubtarget.h"
-#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
@@ -71,19 +70,14 @@ namespace llvm {
TOC_ENTRY,
- /// The following three target-specific nodes are used for calls through
+ /// The following two target-specific nodes are used for calls through
/// function pointers in the 64-bit SVR4 ABI.
- /// Restore the TOC from the TOC save area of the current stack frame.
- /// This is basically a hard coded load instruction which additionally
- /// takes/produces a flag.
- TOC_RESTORE,
-
/// Like a regular LOAD but additionally taking/producing a flag.
LOAD,
- /// LOAD into r2 (also taking/producing a flag). Like TOC_RESTORE, this is
- /// a hard coded load instruction.
+ /// Like LOAD (taking/producing a flag), but using r2 as hard-coded
+ /// destination.
LOAD_TOC,
/// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
@@ -121,6 +115,12 @@ namespace llvm {
/// resultant GPR. Bits corresponding to other CR regs are undefined.
MFOCRF,
+ // FIXME: Remove these once the ANDI glue bug is fixed:
+ /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
+ /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
+ /// implement truncation of i32 or i64 to i1.
+ ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT,
+
// EH_SJLJ_SETJMP - SjLj exception handling setjmp.
EH_SJLJ_SETJMP,
@@ -177,6 +177,10 @@ namespace llvm {
CR6SET,
CR6UNSET,
+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
+ /// on PPC32.
+ PPC32_GOT,
+
/// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
/// TLS model, produces an ADDIS8 instruction that adds the GOT
/// base to sym\@got\@tprel\@ha.
@@ -293,25 +297,28 @@ namespace llvm {
namespace PPC {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
- bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+ bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG);
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
- bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+ bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG);
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary);
+ unsigned ShuffleKind, SelectionDAG &DAG);
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary);
+ unsigned ShuffleKind, SelectionDAG &DAG);
- /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
- /// amount, otherwise return -1.
- int isVSLDOIShuffleMask(SDNode *N, bool isUnary);
+ /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
+ /// shift amount, otherwise return -1.
+ int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG);
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
@@ -324,7 +331,7 @@ namespace llvm {
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
- unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize);
+ unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
/// get_VSPLTI_elt - If this is a build_vector of constants which can be
/// formed by using a vspltis[bhw] instruction of the specified element
@@ -333,28 +340,29 @@ namespace llvm {
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
}
+ class PPCSubtarget;
class PPCTargetLowering : public TargetLowering {
- const PPCSubtarget &PPCSubTarget;
+ const PPCSubtarget &Subtarget;
public:
explicit PPCTargetLowering(PPCTargetMachine &TM);
/// getTargetNodeName() - This method returns the name of a target specific
/// DAG node.
- virtual const char *getTargetNodeName(unsigned Opcode) const;
+ const char *getTargetNodeName(unsigned Opcode) const override;
- virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
/// getSetCCResultType - Return the ISD::SETCC ValueType
- virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
+ EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
- virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
- SDValue &Offset,
- ISD::MemIndexedMode &AM,
- SelectionDAG &DAG) const;
+ bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const override;
/// SelectAddressRegReg - Given the specified addressed, check to see if it
/// can be represented as an indexed [r+r] operation. Returns false if it
@@ -374,29 +382,31 @@ namespace llvm {
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
SelectionDAG &DAG) const;
- Sched::Preference getSchedulingPreference(SDNode *N) const;
+ Sched::Preference getSchedulingPreference(SDNode *N) const override;
/// LowerOperation - Provide custom lowering hooks for some operations.
///
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
/// ReplaceNodeResults - Replace the results of node with an illegal result
/// type with new values built out of custom code.
///
- virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG) const;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const override;
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const;
+ unsigned getRegisterByName(const char* RegName, EVT VT) const override;
- virtual MachineBasicBlock *
+ void computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
+
+ MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
+ MachineBasicBlock *MBB) const override;
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
MachineBasicBlock *MBB, bool is64Bit,
unsigned BinOpcode) const;
@@ -410,34 +420,58 @@ namespace llvm {
MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const;
- ConstraintType getConstraintType(const std::string &Constraint) const;
+ ConstraintType
+ getConstraintType(const std::string &Constraint) const override;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
ConstraintWeight getSingleConstraintMatchWeight(
- AsmOperandInfo &info, const char *constraint) const;
+ AsmOperandInfo &info, const char *constraint) const override;
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const;
+ MVT VT) const override;
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
- unsigned getByValTypeAlignment(Type *Ty) const;
+ unsigned getByValTypeAlignment(Type *Ty) const override;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
- virtual void LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const;
+ void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const override;
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ bool isLegalICmpImmediate(int64_t Imm) const override;
+
+ /// isLegalAddImmediate - Return true if the specified immediate is legal
+ /// add immediate, that is the target has add instructions which can
+ /// add a register and the immediate without having to materialize
+ /// the immediate into a register.
+ bool isLegalAddImmediate(int64_t Imm) const override;
+
+ /// isTruncateFree - Return true if it's free to truncate a value of
+ /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in
+ /// register X1 to i32 by referencing its sub-register R1.
+ bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
+ bool isTruncateFree(EVT VT1, EVT VT2) const override;
- virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ /// \brief Returns true if it is beneficial to convert a load of a constant
+ /// to just the constant itself.
+ bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const override;
+
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
@@ -450,25 +484,46 @@ namespace llvm {
/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
- virtual EVT
+ EVT
getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- MachineFunction &MF) const;
+ MachineFunction &MF) const override;
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
- virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
+ bool allowsUnalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
+ bool *Fast = nullptr) const override;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
/// expanded to FMAs when this method returns true, otherwise fmuladd is
/// expanded to fmul + fadd.
- virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
+ bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+
+ // Should we expand the build vector with shuffles?
+ bool
+ shouldExpandBuildVectorWithShuffles(EVT VT,
+ unsigned DefinedValues) const override;
/// createFastISel - This method returns a target-specific FastISel object,
/// or null if the target does not support "fast" instruction selection.
- virtual FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
- const TargetLibraryInfo *LibInfo) const;
+ FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) const override;
+
+ /// \brief Returns true if an argument of type Ty needs to be passed in a
+ /// contiguous block of registers in calling convention CallConv.
+ bool functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override {
+ // We support any array type as "consecutive" block in the parameter
+ // save area. The element type defines the alignment requirement and
+ // whether the argument should go in GPRs, FPRs, or VRs if available.
+ //
+ // Note that clang uses this capability both to implement the ELFv2
+ // homogeneous float/vector aggregate ABI, and to avoid having to use
+ // "byval" when passing aggregates that might fully fit in registers.
+ return Ty->isArrayTy();
+ }
private:
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
@@ -509,6 +564,9 @@ namespace llvm {
const PPCSubtarget &Subtarget) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
@@ -520,6 +578,7 @@ namespace llvm {
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
@@ -538,39 +597,34 @@ namespace llvm {
const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals) const;
- virtual SDValue
+ SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
- virtual SDValue
+ SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
- virtual bool
+ bool
CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const;
+ LLVMContext &Context) const override;
- virtual SDValue
+ SDValue
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- SDLoc dl, SelectionDAG &DAG) const;
+ SDLoc dl, SelectionDAG &DAG) const override;
SDValue
extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG,
SDValue ArgVal, SDLoc dl) const;
- void
- setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
- unsigned nAltivecParamsAtEnd,
- unsigned MinReservedArea, bool isPPC64) const;
-
SDValue
LowerFormalArguments_Darwin(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -625,6 +679,8 @@ namespace llvm {
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 46db4fe91308..9ed384f56244 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -19,11 +19,13 @@ def s16imm64 : Operand<i64> {
let PrintMethod = "printS16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS16ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
}
def u16imm64 : Operand<i64> {
let PrintMethod = "printU16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCU16ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<16>";
}
def s17imm64 : Operand<i64> {
// This operand type is used for addis/lis to allow the assembler parser
@@ -32,14 +34,11 @@ def s17imm64 : Operand<i64> {
let PrintMethod = "printS16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS17ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
}
def tocentry : Operand<iPTR> {
let MIOperandInfo = (ops i64imm:$imm);
}
-def PPCTLSRegOperand : AsmOperandClass {
- let Name = "TLSReg"; let PredicateMethod = "isTLSReg";
- let RenderMethod = "addTLSRegOperands";
-}
def tlsreg : Operand<i64> {
let EncoderMethod = "getTLSRegEncoding";
let ParserMatchClass = PPCTLSRegOperand;
@@ -80,15 +79,22 @@ def HI48_64 : SDNodeXForm<imm, [{
// Calls.
//
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in {
- def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>,
+ Requires<[In64BitMode]>;
+ def BCCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB,
+ []>,
Requires<[In64BitMode]>;
- let isCodeGenOnly = 1 in
- def BCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
- "b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>,
+ def BCCTR8 : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 12, $bi, 0", IIC_BrB, []>,
+ Requires<[In64BitMode]>;
+ def BCCTR8n : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 4, $bi, 0", IIC_BrB, []>,
Requires<[In64BitMode]>;
}
}
@@ -107,9 +113,9 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in {
def BDZLR8 : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
- "bdzlr", BrB, []>;
+ "bdzlr", IIC_BrB, []>;
def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
- "bdnzlr", BrB, []>;
+ "bdnzlr", IIC_BrB, []>;
}
}
@@ -119,41 +125,58 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
+ "bl $func", IIC_BrB, []>; // See Pat patterns below.
def BL8_TLS : IForm<18, 0, 1, (outs), (ins tlscall:$func),
- "bl $func", BrB, []>;
+ "bl $func", IIC_BrB, []>;
def BLA8 : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
- "bla $func", BrB, [(PPCcall (i64 imm:$func))]>;
+ "bla $func", IIC_BrB, [(PPCcall (i64 imm:$func))]>;
}
let Uses = [RM], isCodeGenOnly = 1 in {
def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24,
(outs), (ins calltarget:$func),
- "bl $func\n\tnop", BrB, []>;
+ "bl $func\n\tnop", IIC_BrB, []>;
def BL8_NOP_TLS : IForm_and_DForm_4_zero<18, 0, 1, 24,
(outs), (ins tlscall:$func),
- "bl $func\n\tnop", BrB, []>;
+ "bl $func\n\tnop", IIC_BrB, []>;
def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24,
(outs), (ins abscalltarget:$func),
- "bla $func\n\tnop", BrB,
+ "bla $func\n\tnop", IIC_BrB,
[(PPCcall_nop (i64 imm:$func))]>;
}
let Uses = [CTR8, RM] in {
def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
- "bctrl", BrB, [(PPCbctrl)]>,
+ "bctrl", IIC_BrB, [(PPCbctrl)]>,
Requires<[In64BitMode]>;
- let isCodeGenOnly = 1 in
- def BCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
- "b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>,
- Requires<[In64BitMode]>;
+ let isCodeGenOnly = 1 in {
+ def BCCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB,
+ []>,
+ Requires<[In64BitMode]>;
+
+ def BCCTRL8 : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 12, $bi, 0", IIC_BrB, []>,
+ Requires<[In64BitMode]>;
+ def BCCTRL8n : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 4, $bi, 0", IIC_BrB, []>,
+ Requires<[In64BitMode]>;
+ }
}
}
} // Interpretation64Bit
+// FIXME: Duplicating this for the asm parser should be unnecessary, but the
+// previous definition must be marked as CodeGen only to prevent decoding
+// conflicts.
+let Interpretation64Bit = 1, isAsmParserOnly = 1 in
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8], Uses = [RM] in
+def BL8_TLS_ : IForm<18, 0, 1, (outs), (ins tlscall:$func),
+ "bl $func", IIC_BrB, []>;
+
// Calls
def : Pat<(PPCcall (i64 tglobaladdr:$dst)),
(BL8 tglobaladdr:$dst)>;
@@ -199,16 +222,16 @@ let usesCustomInserter = 1 in {
// Instructions to support atomic operations
def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
- "ldarx $rD, $ptr", LdStLDARX,
+ "ldarx $rD, $ptr", IIC_LdStLDARX,
[(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
let Defs = [CR0] in
def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdcx. $rS, $dst", LdStSTDCX,
+ "stdcx. $rS, $dst", IIC_LdStSTDCX,
[(PPCstcx i64:$rS, xoaddr:$dst)]>,
isDOT;
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNdi8 :Pseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
@@ -225,28 +248,23 @@ def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
"#TC_RETURNr8 $dst $offset",
[]>;
-let isCodeGenOnly = 1 in {
-
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in
-def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>,
Requires<[In64BitMode]>;
-
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
- "b $dst", BrB,
+ "b $dst", IIC_BrB,
[]>;
-
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
- "ba $dst", BrB,
+ "ba $dst", IIC_BrB,
[]>;
-
-}
} // Interpretation64Bit
def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
@@ -260,23 +278,23 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
// 64-bit CR instructions
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in {
def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST),
- "mtocrf $FXM, $ST", BrMCRX>,
+ "mtocrf $FXM, $ST", IIC_BrMCRX>,
PPC970_DGroup_First, PPC970_Unit_CRU;
def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS),
- "mtcrf $FXM, $rS", BrMCRX>,
+ "mtcrf $FXM, $rS", IIC_BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
- "mfocrf $rT, $FXM", SprMFCR>,
+ "mfocrf $rT, $FXM", IIC_SprMFCRF>,
PPC970_DGroup_First, PPC970_Unit_CRU;
def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
- "mfcr $rT", SprMFCR>,
+ "mfcr $rT", IIC_SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
} // neverHasSideEffects = 1
@@ -298,24 +316,24 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
let Uses = [CTR8] in {
def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs g8rc:$rT), (ins),
- "mfctr $rT", SprMFSPR>,
+ "mfctr $rT", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in {
def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
- "mtctr $rS", SprMTSPR>,
+ "mtctr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR8] in {
+let hasSideEffects = 1, Defs = [CTR8] in {
let Pattern = [(int_ppc_mtctr i64:$rS)] in
def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
- "mtctr $rS", SprMTSPR>,
+ "mtctr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let isCodeGenOnly = 1, Pattern = [(set i64:$rT, readcyclecounter)] in
+let Pattern = [(set i64:$rT, readcyclecounter)] in
def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
- "mfspr $rT, 268", SprMFTB>,
+ "mfspr $rT, 268", IIC_SprMFTB>,
PPC970_DGroup_First, PPC970_Unit_FXU;
// Note that encoding mftb using mfspr is now the preferred form,
// and has been since at least ISA v2.03. The mftb instruction has
@@ -329,12 +347,12 @@ def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#D
let Defs = [LR8] in {
def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS),
- "mtlr $rS", SprMTSPR>,
+ "mtlr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Uses = [LR8] in {
def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins),
- "mflr $rT", SprMFSPR>,
+ "mflr $rT", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
} // Interpretation64Bit
@@ -346,213 +364,236 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins),
let PPC970_Unit = 1 in { // FXU Operations.
let Interpretation64Bit = 1 in {
let neverHasSideEffects = 1 in {
+let isCodeGenOnly = 1 in {
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins s16imm64:$imm),
- "li $rD, $imm", IntSimple,
+ "li $rD, $imm", IIC_IntSimple,
[(set i64:$rD, imm64SExt16:$imm)]>;
def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins s17imm64:$imm),
- "lis $rD, $imm", IntSimple,
+ "lis $rD, $imm", IIC_IntSimple,
[(set i64:$rD, imm16ShiftedSExt:$imm)]>;
}
// Logical ops.
+let isCommutable = 1 in {
defm NAND8: XForm_6r<31, 476, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "nand", "$rA, $rS, $rB", IntSimple,
+ "nand", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
defm AND8 : XForm_6r<31, 28, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "and", "$rA, $rS, $rB", IntSimple,
+ "and", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (and i64:$rS, i64:$rB))]>;
+} // isCommutable
defm ANDC8: XForm_6r<31, 60, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "andc", "$rA, $rS, $rB", IntSimple,
+ "andc", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
+let isCommutable = 1 in {
defm OR8 : XForm_6r<31, 444, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "or", "$rA, $rS, $rB", IntSimple,
+ "or", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (or i64:$rS, i64:$rB))]>;
defm NOR8 : XForm_6r<31, 124, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "nor", "$rA, $rS, $rB", IntSimple,
+ "nor", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
+} // isCommutable
defm ORC8 : XForm_6r<31, 412, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "orc", "$rA, $rS, $rB", IntSimple,
+ "orc", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
+let isCommutable = 1 in {
defm EQV8 : XForm_6r<31, 284, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "eqv", "$rA, $rS, $rB", IntSimple,
+ "eqv", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "xor", "$rA, $rS, $rB", IntSimple,
+ "xor", "$rA, $rS, $rB", IIC_IntSimple,
[(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
+} // let isCommutable = 1
// Logical ops with immediate.
let Defs = [CR0] in {
-def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
- "andi. $dst, $src1, $src2", IntGeneral,
+def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "andi. $dst, $src1, $src2", IIC_IntGeneral,
[(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
isDOT;
-def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
- "andis. $dst, $src1, $src2", IntGeneral,
+def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "andis. $dst, $src1, $src2", IIC_IntGeneral,
[(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
isDOT;
}
-def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
- "ori $dst, $src1, $src2", IntSimple,
+def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "ori $dst, $src1, $src2", IIC_IntSimple,
[(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>;
-def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
- "oris $dst, $src1, $src2", IntSimple,
+def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "oris $dst, $src1, $src2", IIC_IntSimple,
[(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>;
-def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
- "xori $dst, $src1, $src2", IntSimple,
+def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "xori $dst, $src1, $src2", IIC_IntSimple,
[(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>;
-def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
- "xoris $dst, $src1, $src2", IntSimple,
+def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "xoris $dst, $src1, $src2", IIC_IntSimple,
[(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
+let isCommutable = 1 in
defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "add", "$rT, $rA, $rB", IntSimple,
+ "add", "$rT, $rA, $rB", IIC_IntSimple,
[(set i64:$rT, (add i64:$rA, i64:$rB))]>;
// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
// initial-exec thread-local storage model.
def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB),
- "add $rT, $rA, $rB", IntSimple,
+ "add $rT, $rA, $rB", IIC_IntSimple,
[(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
+let isCommutable = 1 in
defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "addc", "$rT, $rA, $rB", IntGeneral,
+ "addc", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
PPC970_DGroup_Cracked;
+
let Defs = [CARRY] in
def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
- "addic $rD, $rA, $imm", IntGeneral,
+ "addic $rD, $rA, $imm", IIC_IntGeneral,
[(set i64:$rD, (addc i64:$rA, imm64SExt16:$imm))]>;
def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm),
- "addi $rD, $rA, $imm", IntSimple,
+ "addi $rD, $rA, $imm", IIC_IntSimple,
[(set i64:$rD, (add i64:$rA, imm64SExt16:$imm))]>;
def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s17imm64:$imm),
- "addis $rD, $rA, $imm", IntSimple,
+ "addis $rD, $rA, $imm", IIC_IntSimple,
[(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
let Defs = [CARRY] in {
def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
- "subfic $rD, $rA, $imm", IntGeneral,
+ "subfic $rD, $rA, $imm", IIC_IntGeneral,
[(set i64:$rD, (subc imm64SExt16:$imm, i64:$rA))]>;
defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "subfc", "$rT, $rA, $rB", IntGeneral,
+ "subfc", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
PPC970_DGroup_Cracked;
}
defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "subf", "$rT, $rA, $rB", IntGeneral,
+ "subf", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "neg", "$rT, $rA", IntSimple,
+ "neg", "$rT, $rA", IIC_IntSimple,
[(set i64:$rT, (ineg i64:$rA))]>;
let Uses = [CARRY] in {
+let isCommutable = 1 in
defm ADDE8 : XOForm_1rc<31, 138, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "adde", "$rT, $rA, $rB", IntGeneral,
+ "adde", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
defm ADDME8 : XOForm_3rc<31, 234, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "addme", "$rT, $rA", IntGeneral,
+ "addme", "$rT, $rA", IIC_IntGeneral,
[(set i64:$rT, (adde i64:$rA, -1))]>;
defm ADDZE8 : XOForm_3rc<31, 202, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "addze", "$rT, $rA", IntGeneral,
+ "addze", "$rT, $rA", IIC_IntGeneral,
[(set i64:$rT, (adde i64:$rA, 0))]>;
defm SUBFE8 : XOForm_1rc<31, 136, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "subfe", "$rT, $rA, $rB", IntGeneral,
+ "subfe", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "subfme", "$rT, $rA", IntGeneral,
+ "subfme", "$rT, $rA", IIC_IntGeneral,
[(set i64:$rT, (sube -1, i64:$rA))]>;
defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "subfze", "$rT, $rA", IntGeneral,
+ "subfze", "$rT, $rA", IIC_IntGeneral,
[(set i64:$rT, (sube 0, i64:$rA))]>;
}
+} // isCodeGenOnly
+// FIXME: Duplicating this for the asm parser should be unnecessary, but the
+// previous definition must be marked as CodeGen only to prevent decoding
+// conflicts.
+let isAsmParserOnly = 1 in
+def ADD8TLS_ : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB),
+ "add $rT, $rA, $rB", IIC_IntSimple, []>;
+let isCommutable = 1 in {
defm MULHD : XOForm_1r<31, 73, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "mulhd", "$rT, $rA, $rB", IntMulHW,
+ "mulhd", "$rT, $rA, $rB", IIC_IntMulHW,
[(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "mulhdu", "$rT, $rA, $rB", IntMulHWU,
+ "mulhdu", "$rT, $rA, $rB", IIC_IntMulHWU,
[(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
+} // isCommutable
}
} // Interpretation64Bit
let isCompare = 1, neverHasSideEffects = 1 in {
def CMPD : XForm_16_ext<31, 0, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
- "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
+ "cmpd $crD, $rA, $rB", IIC_IntCompare>, isPPC64;
def CMPLD : XForm_16_ext<31, 32, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
- "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
- def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm:$imm),
- "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
- def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm:$src2),
- "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
+ "cmpld $crD, $rA, $rB", IIC_IntCompare>, isPPC64;
+ def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm64:$imm),
+ "cmpdi $crD, $rA, $imm", IIC_IntCompare>, isPPC64;
+ def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "cmpldi $dst, $src1, $src2",
+ IIC_IntCompare>, isPPC64;
}
let neverHasSideEffects = 1 in {
defm SLD : XForm_6r<31, 27, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
- "sld", "$rA, $rS, $rB", IntRotateD,
+ "sld", "$rA, $rS, $rB", IIC_IntRotateD,
[(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
defm SRD : XForm_6r<31, 539, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
- "srd", "$rA, $rS, $rB", IntRotateD,
+ "srd", "$rA, $rS, $rB", IIC_IntRotateD,
[(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
- "srad", "$rA, $rS, $rB", IntRotateD,
+ "srad", "$rA, $rS, $rB", IIC_IntRotateD,
[(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS),
- "extsb", "$rA, $rS", IntSimple,
+ "extsb", "$rA, $rS", IIC_IntSimple,
[(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS),
- "extsh", "$rA, $rS", IntSimple,
+ "extsh", "$rA, $rS", IIC_IntSimple,
[(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
} // Interpretation64Bit
// For fast-isel:
let isCodeGenOnly = 1 in {
def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS),
- "extsb $rA, $rS", IntSimple, []>, isPPC64;
+ "extsb $rA, $rS", IIC_IntSimple, []>, isPPC64;
def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS),
- "extsh $rA, $rS", IntSimple, []>, isPPC64;
+ "extsh $rA, $rS", IIC_IntSimple, []>, isPPC64;
} // isCodeGenOnly for fast-isel
defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS),
- "extsw", "$rA, $rS", IntSimple,
+ "extsw", "$rA, $rS", IIC_IntSimple,
[(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS),
- "extsw", "$rA, $rS", IntSimple,
+ "extsw", "$rA, $rS", IIC_IntSimple,
[(set i64:$rA, (sext i32:$rS))]>, isPPC64;
defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
- "sradi", "$rA, $rS, $SH", IntRotateDI,
+ "sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
[(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
- "cntlzd", "$rA, $rS", IntGeneral,
+ "cntlzd", "$rA, $rS", IIC_IntGeneral,
[(set i64:$rA, (ctlz i64:$rS))]>;
def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
- "popcntd $rA, $rS", IntGeneral,
+ "popcntd $rA, $rS", IIC_IntGeneral,
[(set i64:$rA, (ctpop i64:$rS))]>;
// popcntw also does a population count on the high 32 bits (storing the
// results in the high 32-bits of the output). We'll ignore that here (which is
// safe because we never separately use the high part of the 64-bit registers).
def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
- "popcntw $rA, $rS", IntGeneral,
+ "popcntw $rA, $rS", IIC_IntGeneral,
[(set i32:$rA, (ctpop i32:$rS))]>;
defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divd", "$rT, $rA, $rB", IntDivD,
+ "divd", "$rT, $rA, $rB", IIC_IntDivD,
[(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divdu", "$rT, $rA, $rB", IntDivD,
+ "divdu", "$rT, $rA, $rB", IIC_IntDivD,
[(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
+let isCommutable = 1 in
defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "mulld", "$rT, $rA, $rB", IntMulHD,
+ "mulld", "$rT, $rA, $rB", IIC_IntMulHD,
[(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
- "mulli $rD, $rA, $imm", IntMulLI,
+ "mulli $rD, $rA, $imm", IIC_IntMulLI,
[(set i64:$rD, (mul i64:$rA, imm64SExt16:$imm))]>;
}
@@ -560,7 +601,7 @@ let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA),
(ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldimi", "$rA, $rS, $SH, $MBE", IntRotateDI,
+ "rldimi", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64, RegConstraint<"$rSi = $rA">,
NoEncode<"$rSi">;
}
@@ -568,43 +609,53 @@ defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA),
// Rotate instructions.
defm RLDCL : MDSForm_1r<30, 8,
(outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
- "rldcl", "$rA, $rS, $rB, $MBE", IntRotateD,
+ "rldcl", "$rA, $rS, $rB, $MBE", IIC_IntRotateD,
[]>, isPPC64;
defm RLDCR : MDSForm_1r<30, 9,
(outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
- "rldcr", "$rA, $rS, $rB, $MBE", IntRotateD,
+ "rldcr", "$rA, $rS, $rB, $MBE", IIC_IntRotateD,
[]>, isPPC64;
defm RLDICL : MDForm_1r<30, 0,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI,
+ "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
// For fast-isel:
let isCodeGenOnly = 1 in
def RLDICL_32_64 : MDForm_1<30, 0,
(outs g8rc:$rA),
(ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
+ "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
// End fast-isel.
defm RLDICR : MDForm_1r<30, 1,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI,
+ "rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
defm RLDIC : MDForm_1r<30, 2,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldic", "$rA, $rS, $SH, $MBE", IntRotateDI,
+ "rldic", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA),
(ins g8rc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
- "rlwinm", "$rA, $rS, $SH, $MB, $ME", IntGeneral,
+ "rlwinm", "$rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
[]>;
+let isCommutable = 1 in {
+// RLWIMI can be commuted if the rotate amount is zero.
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm RLWIMI8 : MForm_2r<20, (outs g8rc:$rA),
+ (ins g8rc:$rSi, g8rc:$rS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME",
+ IIC_IntRotate, []>, PPC970_DGroup_Cracked,
+ RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">;
+}
+
let isSelect = 1 in
def ISEL8 : AForm_4<31, 15,
(outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond),
- "isel $rT, $rA, $rB, $cond", IntGeneral,
+ "isel $rT, $rA, $rB, $cond", IIC_IntGeneral,
[]>;
} // Interpretation64Bit
} // neverHasSideEffects = 1
@@ -618,111 +669,111 @@ def ISEL8 : AForm_4<31, 15,
// Sign extending loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
- "lha $rD, $src", LdStLHA,
+ "lha $rD, $src", IIC_LdStLHA,
[(set i64:$rD, (sextloadi16 iaddr:$src))]>,
PPC970_DGroup_Cracked;
def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
- "lwa $rD, $src", LdStLWA,
+ "lwa $rD, $src", IIC_LdStLWA,
[(set i64:$rD,
(aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHAX8: XForm_1<31, 343, (outs g8rc:$rD), (ins memrr:$src),
- "lhax $rD, $src", LdStLHA,
+ "lhax $rD, $src", IIC_LdStLHA,
[(set i64:$rD, (sextloadi16 xaddr:$src))]>,
PPC970_DGroup_Cracked;
def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src),
- "lwax $rD, $src", LdStLHA,
+ "lwax $rD, $src", IIC_LdStLHA,
[(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
// For fast-isel:
let isCodeGenOnly = 1, mayLoad = 1 in {
def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src),
- "lwa $rD, $src", LdStLWA, []>, isPPC64,
+ "lwa $rD, $src", IIC_LdStLWA, []>, isPPC64,
PPC970_DGroup_Cracked;
def LWAX_32 : XForm_1<31, 341, (outs gprc:$rD), (ins memrr:$src),
- "lwax $rD, $src", LdStLHA, []>, isPPC64,
+ "lwax $rD, $src", IIC_LdStLHA, []>, isPPC64,
PPC970_DGroup_Cracked;
} // end fast-isel isCodeGenOnly
// Update forms.
let mayLoad = 1, neverHasSideEffects = 1 in {
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHAU8 : DForm_1<43, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memri:$addr),
- "lhau $rD, $addr", LdStLHAU,
+ "lhau $rD, $addr", IIC_LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// NO LWAU!
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHAUX8 : XForm_1<31, 375, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lhaux $rD, $addr", LdStLHAU,
+ "lhaux $rD, $addr", IIC_LdStLHAUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lwaux $rD, $addr", LdStLHAU,
+ "lwaux $rD, $addr", IIC_LdStLHAUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
}
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
// Zero extending loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src),
- "lbz $rD, $src", LdStLoad,
+ "lbz $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi8 iaddr:$src))]>;
def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src),
- "lhz $rD, $src", LdStLoad,
+ "lhz $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi16 iaddr:$src))]>;
def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src),
- "lwz $rD, $src", LdStLoad,
+ "lwz $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
def LBZX8 : XForm_1<31, 87, (outs g8rc:$rD), (ins memrr:$src),
- "lbzx $rD, $src", LdStLoad,
+ "lbzx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi8 xaddr:$src))]>;
def LHZX8 : XForm_1<31, 279, (outs g8rc:$rD), (ins memrr:$src),
- "lhzx $rD, $src", LdStLoad,
+ "lhzx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi16 xaddr:$src))]>;
def LWZX8 : XForm_1<31, 23, (outs g8rc:$rD), (ins memrr:$src),
- "lwzx $rD, $src", LdStLoad,
+ "lwzx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi32 xaddr:$src))]>;
// Update forms.
let mayLoad = 1, neverHasSideEffects = 1 in {
def LBZU8 : DForm_1<35, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lbzu $rD, $addr", LdStLoadUpd,
+ "lbzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHZU8 : DForm_1<41, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lhzu $rD, $addr", LdStLoadUpd,
+ "lhzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LWZU8 : DForm_1<33, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lwzu $rD, $addr", LdStLoadUpd,
+ "lwzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LBZUX8 : XForm_1<31, 119, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lbzux $rD, $addr", LdStLoadUpd,
+ "lbzux $rD, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LHZUX8 : XForm_1<31, 311, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lhzux $rD, $addr", LdStLoadUpd,
+ "lhzux $rD, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lwzux $rD, $addr", LdStLoadUpd,
+ "lwzux $rD, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
}
@@ -733,7 +784,7 @@ def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
// Full 8-byte loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
- "ld $rD, $src", LdStLD,
+ "ld $rD, $src", IIC_LdStLD,
[(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
// The following three definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
@@ -751,33 +802,27 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
[(set i64:$rD,
(PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
-let hasSideEffects = 1, isCodeGenOnly = 1 in {
-let RST = 2, DS = 2 in
-def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg),
- "ld 2, 8($reg)", LdStLD,
- [(PPCload_toc i64:$reg)]>, isPPC64;
-
-let RST = 2, DS = 10, RA = 1 in
-def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
- "ld 2, 40(1)", LdStLD,
- [(PPCtoc_restore)]>, isPPC64;
-}
+let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2, Defs = [X2] in
+def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
+ "ld 2, $src", IIC_LdStLD,
+ [(PPCload_toc ixaddr:$src)]>, isPPC64;
+
def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src),
- "ldx $rD, $src", LdStLD,
+ "ldx $rD, $src", IIC_LdStLD,
[(set i64:$rD, (load xaddr:$src))]>, isPPC64;
def LDBRX : XForm_1<31, 532, (outs g8rc:$rD), (ins memrr:$src),
- "ldbrx $rD, $src", LdStLoad,
+ "ldbrx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
let mayLoad = 1, neverHasSideEffects = 1 in {
def LDU : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
- "ldu $rD, $addr", LdStLDU,
+ "ldu $rD, $addr", IIC_LdStLDU,
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "ldux $rD, $addr", LdStLDU,
+ "ldux $rD, $addr", IIC_LdStLDUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
@@ -860,78 +905,79 @@ def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
isPPC64;
let PPC970_Unit = 2 in {
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
// Truncating stores.
def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src),
- "stb $rS, $src", LdStStore,
+ "stb $rS, $src", IIC_LdStStore,
[(truncstorei8 i64:$rS, iaddr:$src)]>;
def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src),
- "sth $rS, $src", LdStStore,
+ "sth $rS, $src", IIC_LdStStore,
[(truncstorei16 i64:$rS, iaddr:$src)]>;
def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src),
- "stw $rS, $src", LdStStore,
+ "stw $rS, $src", IIC_LdStStore,
[(truncstorei32 i64:$rS, iaddr:$src)]>;
def STBX8 : XForm_8<31, 215, (outs), (ins g8rc:$rS, memrr:$dst),
- "stbx $rS, $dst", LdStStore,
+ "stbx $rS, $dst", IIC_LdStStore,
[(truncstorei8 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHX8 : XForm_8<31, 407, (outs), (ins g8rc:$rS, memrr:$dst),
- "sthx $rS, $dst", LdStStore,
+ "sthx $rS, $dst", IIC_LdStStore,
[(truncstorei16 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STWX8 : XForm_8<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
- "stwx $rS, $dst", LdStStore,
+ "stwx $rS, $dst", IIC_LdStStore,
[(truncstorei32 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
} // Interpretation64Bit
// Normal 8-byte stores.
def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
- "std $rS, $dst", LdStSTD,
+ "std $rS, $dst", IIC_LdStSTD,
[(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
def STDX : XForm_8<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdx $rS, $dst", LdStSTD,
+ "stdx $rS, $dst", IIC_LdStSTD,
[(store i64:$rS, xaddr:$dst)]>, isPPC64,
PPC970_DGroup_Cracked;
def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdbrx $rS, $dst", LdStStore,
+ "stdbrx $rS, $dst", IIC_LdStStore,
[(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
PPC970_DGroup_Cracked;
}
// Stores with Update (pre-inc).
let PPC970_Unit = 2, mayStore = 1 in {
-let Interpretation64Bit = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "stbu $rS, $dst", LdStStoreUpd, []>,
+ "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "sthu $rS, $dst", LdStStoreUpd, []>,
+ "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "stwu $rS, $dst", LdStStoreUpd, []>,
+ "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst),
- "stdu $rS, $dst", LdStSTDU, []>,
- RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
- isPPC64;
def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
- "stbux $rS, $dst", LdStStoreUpd, []>,
+ "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
- "sthux $rS, $dst", LdStStoreUpd, []>,
+ "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
- "stwux $rS, $dst", LdStStoreUpd, []>,
+ "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
} // Interpretation64Bit
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst),
+ "stdu $rS, $dst", IIC_LdStSTDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
+
def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
- "stdux $rS, $dst", LdStSTDU, []>,
+ "stdux $rS, $dst", IIC_LdStSTDUX, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked, isPPC64;
}
@@ -966,29 +1012,29 @@ def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
let PPC970_Unit = 3, neverHasSideEffects = 1,
Uses = [RM] in { // FPU Operations.
defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
- "fcfid", "$frD, $frB", FPGeneral,
+ "fcfid", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctid", "$frD, $frB", FPGeneral,
+ "fctid", "$frD, $frB", IIC_FPGeneral,
[]>, isPPC64;
defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctidz", "$frD, $frB", FPGeneral,
+ "fctidz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB),
- "fcfidu", "$frD, $frB", FPGeneral,
+ "fcfidu", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB),
- "fcfids", "$frD, $frB", FPGeneral,
+ "fcfids", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB),
- "fcfidus", "$frD, $frB", FPGeneral,
+ "fcfidus", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiduz", "$frD, $frB", FPGeneral,
+ "fctiduz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiwuz", "$frD, $frB", FPGeneral,
+ "fctiwuz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
}
@@ -1006,6 +1052,14 @@ def : Pat<(i64 (anyext i32:$in)),
def : Pat<(i32 (trunc i64:$in)),
(EXTRACT_SUBREG $in, sub_32)>;
+// Implement the 'not' operation with the NOR instruction.
+// (we could use the default xori pattern, but nor has lower latency on some
+// cores (such as the A2)).
+def i64not : OutPatFrag<(ops node:$in),
+ (NOR8 $in, $in)>;
+def : Pat<(not i64:$in),
+ (i64not $in)>;
+
// Extending loads with i64 targets.
def : Pat<(zextloadi1 iaddr:$src),
(LBZ8 iaddr:$src)>;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index a55abe373556..b271b5d5aa21 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -22,111 +22,160 @@ def vnot_ppc : PatFrag<(ops node:$in),
def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
}]>;
def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
}]>;
def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
}]>;
def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
}]>;
+// These fragments are provided for little-endian, where the inputs must be
+// swapped for correct semantics.
+def vpkuhum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
+def vpkuwum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG);
}]>;
def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG);
}]>;
def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG);
}]>;
def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG);
}]>;
def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG);
}]>;
def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG);
}]>;
def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG);
}]>;
def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG);
}]>;
def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG);
}]>;
def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG);
}]>;
def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG);
}]>;
def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG);
+}]>;
+
+
+// These fragments are provided for little-endian, where the inputs must be
+// swapped for correct semantics.
+def vmrglb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG);
+}]>;
+def vmrglh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG);
+}]>;
+def vmrglw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG);
+}]>;
+def vmrghb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG);
+}]>;
+def vmrghh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG);
+}]>;
+def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG);
}]>;
def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG));
}]>;
def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVSLDOIShuffleMask(N, false) != -1;
+ return PPC::isVSLDOIShuffleMask(N, 0, *CurDAG) != -1;
}], VSLDOI_get_imm>;
/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
/// vector_shuffle(X,undef,mask) by the dag combiner.
def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 1, *CurDAG));
}]>;
def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVSLDOIShuffleMask(N, true) != -1;
+ return PPC::isVSLDOIShuffleMask(N, 1, *CurDAG) != -1;
}], VSLDOI_unary_get_imm>;
+/// VSLDOI_swapped* - These fragments are provided for little-endian, where
+/// the inputs must be swapped for correct semantics.
+def VSLDOI_swapped_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 2, *CurDAG));
+}]>;
+def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, 2, *CurDAG) != -1;
+}], VSLDOI_get_imm>;
+
+
// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 1));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG));
}]>;
def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
}], VSPLTB_get_imm>;
def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 2));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG));
}]>;
def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
}], VSPLTH_get_imm>;
def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 4));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG));
}]>;
def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
@@ -164,7 +213,7 @@ def vecspltisw : PatLeaf<(build_vector), [{
// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type.
class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
: VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
- !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
[(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>;
// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the
@@ -172,7 +221,7 @@ class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType InTy>
: VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
- !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
[(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>;
// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two
@@ -180,14 +229,14 @@ class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType In1Ty, ValueType In2Ty>
: VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
- !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
[(set OutTy:$vD,
(IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>;
// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type.
class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
: VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
[(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>;
// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the
@@ -195,7 +244,7 @@ class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType InTy>
: VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
[(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>;
// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two
@@ -203,13 +252,13 @@ class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType In1Ty, ValueType In2Ty>
: VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
[(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>;
// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type.
class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
: VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
- !strconcat(opc, " $vD, $vB"), VecFP,
+ !strconcat(opc, " $vD, $vB"), IIC_VecFP,
[(set v4f32:$vD, (IntID v4f32:$vB))]>;
// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the
@@ -217,128 +266,130 @@ class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType InTy>
: VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
- !strconcat(opc, " $vD, $vB"), VecFP,
+ !strconcat(opc, " $vD, $vB"), IIC_VecFP,
[(set OutTy:$vD, (IntID InTy:$vB))]>;
//===----------------------------------------------------------------------===//
// Instruction Definitions.
-def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">;
+def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">;
let Predicates = [HasAltivec] in {
let isCodeGenOnly = 1 in {
def DSS : DSS_Form<822, (outs),
(ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
- "dss $STRM", LdStLoad /*FIXME*/, []>,
+ "dss $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DSSALL : DSS_Form<822, (outs),
(ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
- "dssall", LdStLoad /*FIXME*/, []>,
+ "dssall", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DST : DSS_Form<342, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DSTT : DSS_Form<342, (outs),
(ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DSTST : DSS_Form<374, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DSTSTT : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DST64 : DSS_Form<342, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DSTT64 : DSS_Form<342, (outs),
(ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DSTST64 : DSS_Form<374, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
def DSTSTT64 : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST>;
}
def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
- "mfvscr $vD", LdStStore,
+ "mfvscr $vD", IIC_LdStStore,
[(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
- "mtvscr $vB", LdStLoad,
+ "mtvscr $vB", IIC_LdStLoad,
[(int_ppc_altivec_mtvscr v4i32:$vB)]>;
let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src),
- "lvebx $vD, $src", LdStLoad,
+ "lvebx $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
def LVEHX: XForm_1<31, 39, (outs vrrc:$vD), (ins memrr:$src),
- "lvehx $vD, $src", LdStLoad,
+ "lvehx $vD, $src", IIC_LdStLoad,
[(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
def LVEWX: XForm_1<31, 71, (outs vrrc:$vD), (ins memrr:$src),
- "lvewx $vD, $src", LdStLoad,
+ "lvewx $vD, $src", IIC_LdStLoad,
[(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
def LVX : XForm_1<31, 103, (outs vrrc:$vD), (ins memrr:$src),
- "lvx $vD, $src", LdStLoad,
+ "lvx $vD, $src", IIC_LdStLoad,
[(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
def LVXL : XForm_1<31, 359, (outs vrrc:$vD), (ins memrr:$src),
- "lvxl $vD, $src", LdStLoad,
+ "lvxl $vD, $src", IIC_LdStLoad,
[(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
}
def LVSL : XForm_1<31, 6, (outs vrrc:$vD), (ins memrr:$src),
- "lvsl $vD, $src", LdStLoad,
+ "lvsl $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
PPC970_Unit_LSU;
def LVSR : XForm_1<31, 38, (outs vrrc:$vD), (ins memrr:$src),
- "lvsr $vD, $src", LdStLoad,
+ "lvsr $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
PPC970_Unit_LSU;
let PPC970_Unit = 2 in { // Stores.
def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvebx $rS, $dst", LdStStore,
+ "stvebx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
def STVEHX: XForm_8<31, 167, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvehx $rS, $dst", LdStStore,
+ "stvehx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>;
def STVEWX: XForm_8<31, 199, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvewx $rS, $dst", LdStStore,
+ "stvewx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>;
def STVX : XForm_8<31, 231, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvx $rS, $dst", LdStStore,
+ "stvx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>;
def STVXL : XForm_8<31, 487, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvxl $rS, $dst", LdStStore,
+ "stvxl $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>;
}
let PPC970_Unit = 5 in { // VALU Operations.
// VA-Form instructions. 3-input AltiVec ops.
+let isCommutable = 1 in {
def VMADDFP : VAForm_1<46, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
- "vmaddfp $vD, $vA, $vC, $vB", VecFP,
+ "vmaddfp $vD, $vA, $vC, $vB", IIC_VecFP,
[(set v4f32:$vD,
(fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>;
// FIXME: The fma+fneg pattern won't match because fneg is not legal.
def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
- "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
+ "vnmsubfp $vD, $vA, $vC, $vB", IIC_VecFP,
[(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
- (fneg v4f32:$vB))))]>;
+ (fneg v4f32:$vB))))]>;
def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
v8i16>;
def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>;
+} // isCommutable
def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
v4i32, v4i32, v16i8>;
@@ -346,23 +397,24 @@ def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>;
// Shuffles.
def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u5imm:$SH),
- "vsldoi $vD, $vA, $vB, $SH", VecFP,
+ "vsldoi $vD, $vA, $vB, $SH", IIC_VecFP,
[(set v16i8:$vD,
(vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>;
// VX-Form instructions. AltiVec arithmetic ops.
+let isCommutable = 1 in {
def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vaddfp $vD, $vA, $vB", VecFP,
+ "vaddfp $vD, $vA, $vB", IIC_VecFP,
[(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vaddubm $vD, $vA, $vB", VecGeneral,
+ "vaddubm $vD, $vA, $vB", IIC_VecGeneral,
[(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vadduhm $vD, $vA, $vB", VecGeneral,
+ "vadduhm $vD, $vA, $vB", IIC_VecGeneral,
[(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>;
def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vadduwm $vD, $vA, $vB", VecGeneral,
+ "vadduwm $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>;
@@ -372,30 +424,31 @@ def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>;
def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>;
def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>;
def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>;
-
-
+} // isCommutable
+
+let isCommutable = 1 in
def VAND : VXForm_1<1028, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vand $vD, $vA, $vB", VecFP,
+ "vand $vD, $vA, $vB", IIC_VecFP,
[(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>;
def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vandc $vD, $vA, $vB", VecFP,
+ "vandc $vD, $vA, $vB", IIC_VecFP,
[(set v4i32:$vD, (and v4i32:$vA,
(vnot_ppc v4i32:$vB)))]>;
def VCFSX : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vcfsx $vD, $vB, $UIMM", VecFP,
+ "vcfsx $vD, $vB, $UIMM", IIC_VecFP,
[(set v4f32:$vD,
(int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
def VCFUX : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vcfux $vD, $vB, $UIMM", VecFP,
+ "vcfux $vD, $vB, $UIMM", IIC_VecFP,
[(set v4f32:$vD,
(int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vctsxs $vD, $vB, $UIMM", VecFP,
+ "vctsxs $vD, $vB, $UIMM", IIC_VecFP,
[(set v4i32:$vD,
(int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vctuxs $vD, $vB, $UIMM", VecFP,
+ "vctuxs $vD, $vB, $UIMM", IIC_VecFP,
[(set v4i32:$vD,
(int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
@@ -404,25 +457,26 @@ def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
// to floating-point (sint_to_fp/uint_to_fp) conversions.
let isCodeGenOnly = 1, VA = 0 in {
def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB),
- "vcfsx $vD, $vB, 0", VecFP,
+ "vcfsx $vD, $vB, 0", IIC_VecFP,
[(set v4f32:$vD,
(int_ppc_altivec_vcfsx v4i32:$vB, 0))]>;
def VCTUXS_0 : VXForm_1<906, (outs vrrc:$vD), (ins vrrc:$vB),
- "vctuxs $vD, $vB, 0", VecFP,
+ "vctuxs $vD, $vB, 0", IIC_VecFP,
[(set v4i32:$vD,
(int_ppc_altivec_vctuxs v4f32:$vB, 0))]>;
def VCFUX_0 : VXForm_1<778, (outs vrrc:$vD), (ins vrrc:$vB),
- "vcfux $vD, $vB, 0", VecFP,
+ "vcfux $vD, $vB, 0", IIC_VecFP,
[(set v4f32:$vD,
(int_ppc_altivec_vcfux v4i32:$vB, 0))]>;
def VCTSXS_0 : VXForm_1<970, (outs vrrc:$vD), (ins vrrc:$vB),
- "vctsxs $vD, $vB, 0", VecFP,
+ "vctsxs $vD, $vB, 0", IIC_VecFP,
[(set v4i32:$vD,
(int_ppc_altivec_vctsxs v4f32:$vB, 0))]>;
}
def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>;
def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>;
+let isCommutable = 1 in {
def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>;
def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>;
def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>;
@@ -444,24 +498,25 @@ def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>;
def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>;
def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>;
def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>;
+} // isCommutable
def VMRGHB : VXForm_1< 12, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrghb $vD, $vA, $vB", VecFP,
+ "vmrghb $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGHH : VXForm_1< 76, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrghh $vD, $vA, $vB", VecFP,
+ "vmrghh $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGHW : VXForm_1<140, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrghw $vD, $vA, $vB", VecFP,
+ "vmrghw $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGLB : VXForm_1<268, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrglb $vD, $vA, $vB", VecFP,
+ "vmrglb $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGLH : VXForm_1<332, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrglh $vD, $vA, $vB", VecFP,
+ "vmrglh $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGLW : VXForm_1<396, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrglw $vD, $vA, $vB", VecFP,
+ "vmrglw $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm,
@@ -477,6 +532,7 @@ def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm,
def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs,
v4i32, v8i16, v4i32>;
+let isCommutable = 1 in {
def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb,
v8i16, v16i8>;
def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh,
@@ -493,6 +549,7 @@ def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub,
v8i16, v16i8>;
def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh,
v4i32, v8i16>;
+} // isCommutable
def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>;
def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>;
@@ -504,16 +561,16 @@ def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
def VSUBCUW : VX1_Int_Ty<1408, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
def VSUBFP : VXForm_1<74, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsubfp $vD, $vA, $vB", VecGeneral,
+ "vsubfp $vD, $vA, $vB", IIC_VecGeneral,
[(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>;
def VSUBUBM : VXForm_1<1024, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsububm $vD, $vA, $vB", VecGeneral,
+ "vsububm $vD, $vA, $vB", IIC_VecGeneral,
[(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>;
def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsubuhm $vD, $vA, $vB", VecGeneral,
+ "vsubuhm $vD, $vA, $vB", IIC_VecGeneral,
[(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>;
def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsubuwm $vD, $vA, $vB", VecGeneral,
+ "vsubuwm $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>;
@@ -534,15 +591,17 @@ def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
v4i32, v16i8, v4i32>;
def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vnor $vD, $vA, $vB", VecFP,
+ "vnor $vD, $vA, $vB", IIC_VecFP,
[(set v4i32:$vD, (vnot_ppc (or v4i32:$vA,
v4i32:$vB)))]>;
+let isCommutable = 1 in {
def VOR : VXForm_1<1156, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vor $vD, $vA, $vB", VecFP,
+ "vor $vD, $vA, $vB", IIC_VecFP,
[(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>;
def VXOR : VXForm_1<1220, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vxor $vD, $vA, $vB", VecFP,
+ "vxor $vD, $vA, $vB", IIC_VecFP,
[(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>;
+} // isCommutable
def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>;
def VRLH : VX1_Int_Ty< 68, "vrlh", int_ppc_altivec_vrlh, v8i16>;
@@ -556,15 +615,15 @@ def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>;
def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>;
def VSPLTB : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vspltb $vD, $vB, $UIMM", VecPerm,
+ "vspltb $vD, $vB, $UIMM", IIC_VecPerm,
[(set v16i8:$vD,
(vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>;
def VSPLTH : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vsplth $vD, $vB, $UIMM", VecPerm,
+ "vsplth $vD, $vB, $UIMM", IIC_VecPerm,
[(set v16i8:$vD,
(vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>;
def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vspltw $vD, $vB, $UIMM", VecPerm,
+ "vspltw $vD, $vB, $UIMM", IIC_VecPerm,
[(set v16i8:$vD,
(vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
@@ -580,13 +639,13 @@ def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>;
def VSPLTISB : VXForm_3<780, (outs vrrc:$vD), (ins s5imm:$SIMM),
- "vspltisb $vD, $SIMM", VecPerm,
+ "vspltisb $vD, $SIMM", IIC_VecPerm,
[(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>;
def VSPLTISH : VXForm_3<844, (outs vrrc:$vD), (ins s5imm:$SIMM),
- "vspltish $vD, $SIMM", VecPerm,
+ "vspltish $vD, $SIMM", IIC_VecPerm,
[(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>;
def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM),
- "vspltisw $vD, $SIMM", VecPerm,
+ "vspltisw $vD, $SIMM", IIC_VecPerm,
[(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>;
// Vector Pack.
@@ -601,13 +660,13 @@ def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
v8i16, v4i32>;
def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vpkuhum $vD, $vA, $vB", VecFP,
+ "vpkuhum $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD,
(vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
v16i8, v8i16>;
def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vpkuwum $vD, $vA, $vB", VecFP,
+ "vpkuwum $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD,
(vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
@@ -631,10 +690,12 @@ def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh,
// Altivec Comparisons.
class VCMP<bits<10> xo, string asmstr, ValueType Ty>
- : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),asmstr,VecFPCompare,
+ : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
+ IIC_VecFPCompare,
[(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
- : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),asmstr,VecFPCompare,
+ : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
+ IIC_VecFPCompare,
[(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
let Defs = [CR6];
let RC = 1;
@@ -676,24 +737,24 @@ def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
let isCodeGenOnly = 1 in {
def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
- "vxor $vD, $vD, $vD", VecFP,
+ "vxor $vD, $vD, $vD", IIC_VecFP,
[(set v16i8:$vD, (v16i8 immAllZerosV))]>;
def V_SET0H : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
- "vxor $vD, $vD, $vD", VecFP,
+ "vxor $vD, $vD, $vD", IIC_VecFP,
[(set v8i16:$vD, (v8i16 immAllZerosV))]>;
def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
- "vxor $vD, $vD, $vD", VecFP,
+ "vxor $vD, $vD, $vD", IIC_VecFP,
[(set v4i32:$vD, (v4i32 immAllZerosV))]>;
let IMM=-1 in {
def V_SETALLONESB : VXForm_3<908, (outs vrrc:$vD), (ins),
- "vspltisw $vD, -1", VecFP,
+ "vspltisw $vD, -1", IIC_VecFP,
[(set v16i8:$vD, (v16i8 immAllOnesV))]>;
def V_SETALLONESH : VXForm_3<908, (outs vrrc:$vD), (ins),
- "vspltisw $vD, -1", VecFP,
+ "vspltisw $vD, -1", IIC_VecFP,
[(set v8i16:$vD, (v8i16 immAllOnesV))]>;
def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
- "vspltisw $vD, -1", VecFP,
+ "vspltisw $vD, -1", IIC_VecFP,
[(set v4i32:$vD, (v4i32 immAllOnesV))]>;
}
}
@@ -761,6 +822,16 @@ def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef),
def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef),
(VPKUHUM $vA, $vA)>;
+// Match vsldoi(y,x), vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands.
+// These fragments are matched for little-endian, where the inputs must
+// be swapped for correct semantics.
+def:Pat<(vsldoi_swapped_shuffle:$in v16i8:$vA, v16i8:$vB),
+ (VSLDOI $vB, $vA, (VSLDOI_swapped_get_imm $in))>;
+def:Pat<(vpkuwum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VPKUWUM $vB, $vA)>;
+def:Pat<(vpkuhum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VPKUHUM $vB, $vA)>;
+
// Match vmrg*(x,x)
def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef),
(VMRGLB $vA, $vA)>;
@@ -775,6 +846,22 @@ def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef),
def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
(VMRGHW $vA, $vA)>;
+// Match vmrg*(y,x), i.e., swapped operands. These fragments
+// are matched for little-endian, where the inputs must be
+// swapped for correct semantics.
+def:Pat<(vmrglb_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLB $vB, $vA)>;
+def:Pat<(vmrglh_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLH $vB, $vA)>;
+def:Pat<(vmrglw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLW $vB, $vA)>;
+def:Pat<(vmrghb_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHB $vB, $vA)>;
+def:Pat<(vmrghh_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHH $vB, $vA)>;
+def:Pat<(vmrghw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHW $vB, $vA)>;
+
// Logical Operations
def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>;
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 29233d49148d..1e4396cd1017 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -14,6 +14,8 @@
class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
: Instruction {
field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+ let Size = 4;
bit PPC64 = 0; // Default value, override with isPPC64
@@ -67,6 +69,8 @@ class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: Instruction {
field bits<64> Inst;
+ field bits<64> SoftFail = 0;
+ let Size = 8;
bit PPC64 = 0; // Default value, override with isPPC64
@@ -109,7 +113,7 @@ class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
// 1.7.2 B-Form
class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
- : I<opcode, OOL, IOL, asmstr, BrB> {
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
bits<3> CR;
bits<14> BD;
@@ -135,7 +139,7 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
dag OOL, dag IOL, string asmstr>
- : I<opcode, OOL, IOL, asmstr, BrB> {
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
bits<14> BD;
let Inst{6-10} = bo;
@@ -147,7 +151,7 @@ class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
class BForm_3<bits<6> opcode, bit aa, bit lk,
dag OOL, dag IOL, string asmstr>
- : I<opcode, OOL, IOL, asmstr, BrB> {
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
bits<5> BO;
bits<5> BI;
bits<14> BD;
@@ -159,6 +163,19 @@ class BForm_3<bits<6> opcode, bit aa, bit lk,
let Inst{31} = lk;
}
+class BForm_4<bits<6> opcode, bits<5> bo, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
+ bits<5> BI;
+ bits<14> BD;
+
+ let Inst{6-10} = bo;
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
// 1.7.3 SC-Form
class SCForm<bits<6> opcode, bits<1> xo,
dag OOL, dag IOL, string asmstr, InstrItinClass itin,
@@ -258,6 +275,15 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
let Addr = 0;
}
+class DForm_4_fixedreg_zero<bits<6> opcode, bits<5> R, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : DForm_4<opcode, OOL, IOL, asmstr, itin, pattern> {
+ let A = R;
+ let B = R;
+ let C = 0;
+}
+
class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -334,20 +360,6 @@ class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
let Inst{30-31} = xo;
}
-class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> RST;
- bits<14> DS;
- bits<5> RA;
-
- let Pattern = pattern;
-
- let Inst{6-10} = RST;
- let Inst{11-15} = RA;
- let Inst{16-29} = DS;
- let Inst{30-31} = xo;
-}
// 1.7.6 X-Form
class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
@@ -567,6 +579,173 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let A = 0;
}
+// XX*-Form (VSX)
+class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = XT{5};
+}
+
+class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = 0;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX2Form_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> CR;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = CR;
+ let Inst{9-15} = 0;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+class XX2Form_2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XB;
+ bits<2> D;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-13} = 0;
+ let Inst{14-15} = D;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> CR;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = CR;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+class XX3Form_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<2> D;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21} = 0;
+ let Inst{22-23} = D;
+ let Inst{24-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form_Rc<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21} = RC;
+ let Inst{22-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX4Form<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<6> XC;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-25} = XC{4-0};
+ let Inst{26-27} = xo;
+ let Inst{28} = XC{5};
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
// DCB_Form - Form X instruction, used for dcb* instructions.
class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -664,6 +843,12 @@ class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
let BH = 0;
}
+class XLForm_2_br2<bits<6> opcode, bits<10> xo, bits<5> bo, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ let BO = bo;
+ let BH = 0;
+}
class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk,
dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 80bc27a95765..9bac91d7d412 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -18,26 +18,32 @@
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-instr-info"
+
#define GET_INSTRMAP_INFO
#define GET_INSTRINFO_CTOR_DTOR
#include "PPCGenInstrInfo.inc"
-using namespace llvm;
-
static cl::
opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
cl::desc("Disable analysis for CTR loops"));
@@ -45,26 +51,35 @@ opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
cl::desc("Disable compare instruction optimization"), cl::Hidden);
+static cl::opt<bool> DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation",
+cl::desc("Disable VSX FMA instruction mutation"), cl::Hidden);
+
+static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
+cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
+cl::Hidden);
+
// Pin the vtable to this file.
void PPCInstrInfo::anchor() {}
-PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
- : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
- TM(tm), RI(*TM.getSubtargetImpl()) {}
+PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
+ : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ Subtarget(STI), RI(STI) {}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
/// this target when scheduling the DAG.
-ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
- const TargetMachine *TM,
- const ScheduleDAG *DAG) const {
- unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
+ScheduleHazardRecognizer *
+PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const {
+ unsigned Directive =
+ static_cast<const PPCSubtarget *>(STI)->getDarwinDirective();
if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
- const InstrItineraryData *II = TM->getInstrItineraryData();
- return new PPCScoreboardHazardRecognizer(II, DAG);
+ const InstrItineraryData *II =
+ &static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
+ return new ScoreboardHazardRecognizer(II, DAG);
}
- return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
+ return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
}
/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
@@ -72,17 +87,72 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
- unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ unsigned Directive =
+ DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+
+ if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8)
+ return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
// Most subtargets use a PPC970 recognizer.
if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
- assert(TM.getInstrInfo() && "No InstrInfo?");
+ assert(DAG->TII && "No InstrInfo?");
+
+ return new PPCHazardRecognizer970(*DAG);
+ }
+
+ return new ScoreboardHazardRecognizer(II, DAG);
+}
+
+
+int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const {
+ int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
+ UseMI, UseIdx);
+
+ const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ unsigned Reg = DefMO.getReg();
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ bool IsRegCR;
+ if (TRI->isVirtualRegister(Reg)) {
+ const MachineRegisterInfo *MRI =
+ &DefMI->getParent()->getParent()->getRegInfo();
+ IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
+ MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
+ } else {
+ IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
+ PPC::CRBITRCRegClass.contains(Reg);
+ }
- return new PPCHazardRecognizer970(TM);
+ if (UseMI->isBranch() && IsRegCR) {
+ if (Latency < 0)
+ Latency = getInstrLatency(ItinData, DefMI);
+
+ // On some cores, there is an additional delay between writing to a condition
+ // register, and using it from a branch.
+ unsigned Directive = Subtarget.getDarwinDirective();
+ switch (Directive) {
+ default: break;
+ case PPC::DIR_7400:
+ case PPC::DIR_750:
+ case PPC::DIR_970:
+ case PPC::DIR_E5500:
+ case PPC::DIR_PWR4:
+ case PPC::DIR_PWR5:
+ case PPC::DIR_PWR5X:
+ case PPC::DIR_PWR6:
+ case PPC::DIR_PWR6X:
+ case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
+ Latency += 2;
+ break;
+ }
}
- return new PPCScoreboardHazardRecognizer(II, DAG);
+ return Latency;
}
// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
@@ -110,7 +180,9 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
case PPC::LFS:
case PPC::LFD:
case PPC::RESTORE_CR:
+ case PPC::RESTORE_CRBIT:
case PPC::LVX:
+ case PPC::LXVD2X:
case PPC::RESTORE_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
@@ -134,7 +206,9 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
case PPC::STFS:
case PPC::STFD:
case PPC::SPILL_CR:
+ case PPC::SPILL_CRBIT:
case PPC::STVX:
+ case PPC::STXVD2X:
case PPC::SPILL_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
@@ -156,12 +230,14 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// Normal instructions can be commuted the obvious way.
if (MI->getOpcode() != PPC::RLWIMI &&
- MI->getOpcode() != PPC::RLWIMIo)
+ MI->getOpcode() != PPC::RLWIMIo &&
+ MI->getOpcode() != PPC::RLWIMI8 &&
+ MI->getOpcode() != PPC::RLWIMI8o)
return TargetInstrInfo::commuteInstruction(MI, NewMI);
// Cannot commute if it has a non-zero rotate count.
if (MI->getOperand(3).getImm() != 0)
- return 0;
+ return nullptr;
// If we have a zero rotate count, we have:
// M = mask(MB,ME)
@@ -174,6 +250,8 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
unsigned Reg2 = MI->getOperand(2).getReg();
+ unsigned SubReg1 = MI->getOperand(1).getSubReg();
+ unsigned SubReg2 = MI->getOperand(2).getSubReg();
bool Reg1IsKill = MI->getOperand(1).isKill();
bool Reg2IsKill = MI->getOperand(2).isKill();
bool ChangeReg0 = false;
@@ -183,6 +261,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// Must be two address instruction!
assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
"Expecting a two-address instruction!");
+ assert(MI->getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
Reg2IsKill = false;
ChangeReg0 = true;
}
@@ -203,10 +282,14 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
.addImm((MB-1) & 31);
}
- if (ChangeReg0)
+ if (ChangeReg0) {
MI->getOperand(0).setReg(Reg2);
+ MI->getOperand(0).setSubReg(SubReg2);
+ }
MI->getOperand(2).setReg(Reg1);
MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setSubReg(SubReg1);
+ MI->getOperand(1).setSubReg(SubReg2);
MI->getOperand(2).setIsKill(Reg1IsKill);
MI->getOperand(1).setIsKill(Reg2IsKill);
@@ -216,13 +299,38 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
return MI;
}
+bool PPCInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ // For VSX A-Type FMA instructions, it is the first two operands that can be
+ // commuted, however, because the non-encoded tied input operand is listed
+ // first, the operands to swap are actually the second and third.
+
+ int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode());
+ if (AltOpc == -1)
+ return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+
+ SrcOpIdx1 = 2;
+ SrcOpIdx2 = 3;
+ return true;
+}
+
void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
+ // This function is used for scheduling, and the nop wanted here is the type
+ // that terminates dispatch groups on the POWER cores.
+ unsigned Directive = Subtarget.getDarwinDirective();
+ unsigned Opcode;
+ switch (Directive) {
+ default: Opcode = PPC::NOP; break;
+ case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
+ case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
+ case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
+ }
+
DebugLoc DL;
- BuildMI(MBB, MI, DL, get(PPC::NOP));
+ BuildMI(MBB, MI, DL, get(Opcode));
}
-
// Branch analysis.
// Note: If the condition register is set to CTR or CTR8 then this is a
// BDNZ (imm == 1) or BDZ (imm == 0) branch.
@@ -230,7 +338,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
@@ -263,6 +371,22 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
Cond.push_back(LastInst->getOperand(0));
Cond.push_back(LastInst->getOperand(1));
return false;
+ } else if (LastInst->getOpcode() == PPC::BC) {
+ if (!LastInst->getOperand(1).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BCn) {
+ if (!LastInst->getOperand(1).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
} else if (LastInst->getOpcode() == PPC::BDNZ8 ||
LastInst->getOpcode() == PPC::BDNZ) {
if (!LastInst->getOperand(0).isMBB())
@@ -310,6 +434,26 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
Cond.push_back(SecondLastInst->getOperand(1));
FBB = LastInst->getOperand(0).getMBB();
return false;
+ } else if (SecondLastInst->getOpcode() == PPC::BC &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(1).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (SecondLastInst->getOpcode() == PPC::BCn &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(1).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
} else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 ||
SecondLastInst->getOpcode() == PPC::BDNZ) &&
LastInst->getOpcode() == PPC::B) {
@@ -367,6 +511,7 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
--I;
}
if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
+ I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
return 0;
@@ -379,6 +524,7 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
if (I == MBB.begin()) return 1;
--I;
if (I->getOpcode() != PPC::BCC &&
+ I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
return 1;
@@ -398,19 +544,23 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
assert((Cond.size() == 2 || Cond.size() == 0) &&
"PPC branch conditions have two components!");
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
// One-way branch.
- if (FBB == 0) {
+ if (!FBB) {
if (Cond.empty()) // Unconditional branch
BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
BuildMI(&MBB, DL, get(Cond[0].getImm() ?
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
+ BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
+ BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB);
else // Conditional branch
BuildMI(&MBB, DL, get(PPC::BCC))
- .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB);
return 1;
}
@@ -419,9 +569,13 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
BuildMI(&MBB, DL, get(Cond[0].getImm() ?
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
+ BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
+ BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB);
else
BuildMI(&MBB, DL, get(PPC::BCC))
- .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB);
BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
return 2;
}
@@ -431,7 +585,7 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
const SmallVectorImpl<MachineOperand> &Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
- if (!TM.getSubtargetImpl()->hasISEL())
+ if (!Subtarget.hasISEL())
return false;
if (Cond.size() != 2)
@@ -475,7 +629,7 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
assert(Cond.size() == 2 &&
"PPC branch conditions have two components!");
- assert(TM.getSubtargetImpl()->hasISEL() &&
+ assert(Subtarget.hasISEL() &&
"Cannot insert select on target without ISEL support");
// Get the register classes.
@@ -506,6 +660,8 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
+ case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
+ case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
}
unsigned FirstReg = SwapOps ? FalseReg : TrueReg,
@@ -534,6 +690,47 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
+ // We can end up with self copies and similar things as a result of VSX copy
+ // legalization. Promote them here.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (PPC::F8RCRegClass.contains(DestReg) &&
+ PPC::VSLRCRegClass.contains(SrcReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && SrcReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ DestReg = SuperReg;
+ } else if (PPC::VRRCRegClass.contains(DestReg) &&
+ PPC::VSHRCRegClass.contains(SrcReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && SrcReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ DestReg = SuperReg;
+ } else if (PPC::F8RCRegClass.contains(SrcReg) &&
+ PPC::VSLRCRegClass.contains(DestReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && DestReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ SrcReg = SuperReg;
+ } else if (PPC::VRRCRegClass.contains(SrcReg) &&
+ PPC::VSHRCRegClass.contains(DestReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && DestReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ SrcReg = SuperReg;
+ }
+
unsigned Opc;
if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::OR;
@@ -545,6 +742,18 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = PPC::MCRF;
else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::VOR;
+ else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
+ // There are two different ways this can be done:
+ // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
+ // issue in VSU pipeline 0.
+ // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
+ // can go to either pipeline.
+ // We'll always use xxlor here, because in practically all cases where
+ // copies are generated, they are close enough to some use that the
+ // lower-latency form is preferable.
+ Opc = PPC::XXLOR;
+ else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::XXLORf;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else
@@ -599,47 +808,31 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
FrameIdx));
return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
- // FIXME: We use CRi here because there is no mtcrf on a bit. Since the
- // backend currently only uses CR1EQ as an individual bit, this should
- // not cause any bug. If we need other uses of CR bits, the following
- // code may be invalid.
- unsigned Reg = 0;
- if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
- SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
- Reg = PPC::CR0;
- else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
- SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
- Reg = PPC::CR1;
- else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
- SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
- Reg = PPC::CR2;
- else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
- SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
- Reg = PPC::CR3;
- else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
- SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
- Reg = PPC::CR4;
- else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
- SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
- Reg = PPC::CR5;
- else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
- SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
- Reg = PPC::CR6;
- else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
- SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
- Reg = PPC::CR7;
-
- return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
- &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
-
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CRBIT))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
.addReg(SrcReg,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
+ } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSDX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
- assert(TM.getSubtargetImpl()->isDarwin() &&
+ assert(Subtarget.isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
.addReg(SrcReg,
@@ -717,42 +910,24 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
FrameIdx));
return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
-
- unsigned Reg = 0;
- if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
- DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN)
- Reg = PPC::CR0;
- else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT ||
- DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN)
- Reg = PPC::CR1;
- else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT ||
- DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN)
- Reg = PPC::CR2;
- else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT ||
- DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN)
- Reg = PPC::CR3;
- else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT ||
- DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN)
- Reg = PPC::CR4;
- else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT ||
- DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN)
- Reg = PPC::CR5;
- else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT ||
- DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN)
- Reg = PPC::CR6;
- else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT ||
- DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN)
- Reg = PPC::CR7;
-
- return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
- &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
-
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CRBIT), DestReg),
+ FrameIdx));
+ return true;
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
FrameIdx));
NonRI = true;
+ } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg),
+ FrameIdx));
+ NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
- assert(TM.getSubtargetImpl()->isDarwin() &&
+ assert(Subtarget.isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
get(PPC::RESTORE_VRSAVE),
@@ -866,7 +1041,7 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
unsigned ZeroReg;
if (UseInfo->isLookupPtrRegClass()) {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
} else {
ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
@@ -933,13 +1108,21 @@ bool PPCInstrInfo::PredicateInstruction(
unsigned OpC = MI->getOpcode();
if (OpC == PPC::BLR) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
MI->setDesc(get(Pred[0].getImm() ?
(isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
(isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
- } else {
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MI->setDesc(get(PPC::BCLR));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
+ MI->setDesc(get(PPC::BCLRn));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ } else {
+ MI->setDesc(get(PPC::BCCLR));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
.addReg(Pred[1].getReg());
}
@@ -947,10 +1130,26 @@ bool PPCInstrInfo::PredicateInstruction(
return true;
} else if (OpC == PPC::B) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
MI->setDesc(get(Pred[0].getImm() ?
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
+ MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(PPC::BC));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg())
+ .addMBB(MBB);
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
+ MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(PPC::BCn));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg())
+ .addMBB(MBB);
} else {
MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
MI->RemoveOperand(0);
@@ -969,9 +1168,24 @@ bool PPCInstrInfo::PredicateInstruction(
llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
- MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) :
- (setLR ? PPC::BCCTRL : PPC::BCCTR)));
+ bool isPPC64 = Subtarget.isPPC64();
+
+ if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
+ MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) :
+ (setLR ? PPC::BCCTRL : PPC::BCCTR)));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ return true;
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
+ MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n) :
+ (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ return true;
+ }
+
+ MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8) :
+ (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
.addImm(Pred[0].getImm())
.addReg(Pred[1].getReg());
@@ -1115,7 +1329,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
// for equality checks (as those don't depend on the sign). On PPC64,
// we are restricted to equality for unsigned 64-bit comparisons and for
// signed 32-bit comparisons the applicability is more restricted.
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
@@ -1156,8 +1370,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
if (equalityOnly) {
// We need to check the uses of the condition register in order to reject
// non-equality comparisons.
- for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg),
- IE = MRI->use_end(); I != IE; ++I) {
+ for (MachineRegisterInfo::use_instr_iterator I =MRI->use_instr_begin(CRReg),
+ IE = MRI->use_instr_end(); I != IE; ++I) {
MachineInstr *UseMI = &*I;
if (UseMI->getOpcode() == PPC::BCC) {
unsigned Pred = UseMI->getOperand(0).getImm();
@@ -1179,8 +1393,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end();
I != EL; ++I) {
bool FoundUse = false;
- for (MachineRegisterInfo::use_iterator J = MRI->use_begin(CRReg),
- JE = MRI->use_end(); J != JE; ++J)
+ for (MachineRegisterInfo::use_instr_iterator J =MRI->use_instr_begin(CRReg),
+ JE = MRI->use_instr_end(); J != JE; ++J)
if (&*J == &*I) {
FoundUse = true;
break;
@@ -1193,10 +1407,10 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
// There are two possible candidates which can be changed to set CR[01].
// One is MI, the other is a SUB instruction.
// For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
- MachineInstr *Sub = NULL;
+ MachineInstr *Sub = nullptr;
if (SrcReg2 != 0)
// MI is not a candidate for CMPrr.
- MI = NULL;
+ MI = nullptr;
// FIXME: Conservatively refuse to convert an instruction which isn't in the
// same BB as the comparison. This is to allow the check below to avoid calls
// (and other explicit clobbers); instead we should really check for these
@@ -1289,15 +1503,16 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
}
if (ShouldSwap)
- for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg),
- IE = MRI->use_end(); I != IE; ++I) {
+ for (MachineRegisterInfo::use_instr_iterator
+ I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
+ I != IE; ++I) {
MachineInstr *UseMI = &*I;
if (UseMI->getOpcode() == PPC::BCC) {
PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm();
assert((!equalityOnly ||
Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) &&
"Invalid predicate for equality-only optimization");
- PredsToUpdate.push_back(std::make_pair(&((*I).getOperand(0)),
+ PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
PPC::getSwappedPredicate(Pred)));
} else if (UseMI->getOpcode() == PPC::ISEL ||
UseMI->getOpcode() == PPC::ISEL8) {
@@ -1310,7 +1525,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
else if (NewSubReg == PPC::sub_gt)
NewSubReg = PPC::sub_lt;
- SubRegsToUpdate.push_back(std::make_pair(&((*I).getOperand(3)),
+ SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
NewSubReg));
} else // We need to abort on a user we don't understand.
return false;
@@ -1322,7 +1537,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
CmpInstr->eraseFromParent();
MachineBasicBlock::iterator MII = MI;
- BuildMI(*MI->getParent(), llvm::next(MII), MI->getDebugLoc(),
+ BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
get(TargetOpcode::COPY), CRReg)
.addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
@@ -1367,26 +1582,508 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
/// instruction may be. This returns the maximum number of bytes.
///
unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- case PPC::INLINEASM: { // Inline Asm: Variable size.
+ unsigned Opcode = MI->getOpcode();
+
+ if (Opcode == PPC::INLINEASM) {
const MachineFunction *MF = MI->getParent()->getParent();
const char *AsmStr = MI->getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
- }
- case PPC::PROLOG_LABEL:
- case PPC::EH_LABEL:
- case PPC::GC_LABEL:
- case PPC::DBG_VALUE:
- return 0;
- case PPC::BL8_NOP:
- case PPC::BLA8_NOP:
- return 8;
- default:
- return 4; // PowerPC instructions are all 4 bytes
+ } else {
+ const MCInstrDesc &Desc = get(Opcode);
+ return Desc.getSize();
}
}
#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-vsx-fma-mutate"
+
+namespace {
+ // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
+ // (Altivec and scalar floating-point registers), we need to transform the
+ // copies into subregister copies with other restrictions.
+ struct PPCVSXFMAMutate : public MachineFunctionPass {
+ static char ID;
+ PPCVSXFMAMutate() : MachineFunctionPass(ID) {
+ initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
+ }
+
+ LiveIntervals *LIS;
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+
+ // The default (A-type) VSX FMA form kills the addend (it is taken from
+ // the target register, which is then updated to reflect the result of
+ // the FMA). If the instruction, however, kills one of the registers
+ // used for the product, then we can use the M-form instruction (which
+ // will take that value from the to-be-defined register).
+
+ int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode());
+ if (AltOpc == -1)
+ continue;
+
+ // This pass is run after register coalescing, and so we're looking for
+ // a situation like this:
+ // ...
+ // %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
+ // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
+ // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
+ // ...
+ // %vreg9<def,tied1> = XSMADDADP %vreg9<tied0>, %vreg17, %vreg19,
+ // %RM<imp-use>; VSLRC:%vreg9,%vreg17,%vreg19
+ // ...
+ // Where we can eliminate the copy by changing from the A-type to the
+ // M-type instruction. Specifically, for this example, this means:
+ // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
+ // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
+ // is replaced by:
+ // %vreg16<def,tied1> = XSMADDMDP %vreg16<tied0>, %vreg18, %vreg9,
+ // %RM<imp-use>; VSLRC:%vreg16,%vreg18,%vreg9
+ // and we remove: %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
+
+ SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
+
+ VNInfo *AddendValNo =
+ LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn();
+ MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
+
+ // The addend and this instruction must be in the same block.
+
+ if (!AddendMI || AddendMI->getParent() != MI->getParent())
+ continue;
+
+ // The addend must be a full copy within the same register class.
+
+ if (!AddendMI->isFullCopy())
+ continue;
+
+ unsigned AddendSrcReg = AddendMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) {
+ if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
+ MRI.getRegClass(AddendSrcReg))
+ continue;
+ } else {
+ // If AddendSrcReg is a physical register, make sure the destination
+ // register class contains it.
+ if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
+ ->contains(AddendSrcReg))
+ continue;
+ }
+
+ // In theory, there could be other uses of the addend copy before this
+ // fma. We could deal with this, but that would require additional
+ // logic below and I suspect it will not occur in any relevant
+ // situations.
+ bool OtherUsers = false;
+ for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
+ J != JE; --J)
+ if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
+ OtherUsers = true;
+ break;
+ }
+
+ if (OtherUsers)
+ continue;
+
+ // Find one of the product operands that is killed by this instruction.
+
+ unsigned KilledProdOp = 0, OtherProdOp = 0;
+ if (LIS->getInterval(MI->getOperand(2).getReg())
+ .Query(FMAIdx).isKill()) {
+ KilledProdOp = 2;
+ OtherProdOp = 3;
+ } else if (LIS->getInterval(MI->getOperand(3).getReg())
+ .Query(FMAIdx).isKill()) {
+ KilledProdOp = 3;
+ OtherProdOp = 2;
+ }
+
+ // If there are no killed product operands, then this transformation is
+ // likely not profitable.
+ if (!KilledProdOp)
+ continue;
+
+ // In order to replace the addend here with the source of the copy,
+ // it must still be live here.
+ if (!LIS->getInterval(AddendMI->getOperand(1).getReg()).liveAt(FMAIdx))
+ continue;
+
+ // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
+
+ unsigned AddReg = AddendMI->getOperand(1).getReg();
+ unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg();
+ unsigned OtherProdReg = MI->getOperand(OtherProdOp).getReg();
+
+ unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
+ unsigned KilledProdSubReg = MI->getOperand(KilledProdOp).getSubReg();
+ unsigned OtherProdSubReg = MI->getOperand(OtherProdOp).getSubReg();
+
+ bool AddRegKill = AddendMI->getOperand(1).isKill();
+ bool KilledProdRegKill = MI->getOperand(KilledProdOp).isKill();
+ bool OtherProdRegKill = MI->getOperand(OtherProdOp).isKill();
+
+ bool AddRegUndef = AddendMI->getOperand(1).isUndef();
+ bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef();
+ bool OtherProdRegUndef = MI->getOperand(OtherProdOp).isUndef();
+
+ unsigned OldFMAReg = MI->getOperand(0).getReg();
+
+ assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
+ "Addend copy not tied to old FMA output!");
+
+ DEBUG(dbgs() << "VSX FMA Mutation:\n " << *MI;);
+
+ MI->getOperand(0).setReg(KilledProdReg);
+ MI->getOperand(1).setReg(KilledProdReg);
+ MI->getOperand(3).setReg(AddReg);
+ MI->getOperand(2).setReg(OtherProdReg);
+
+ MI->getOperand(0).setSubReg(KilledProdSubReg);
+ MI->getOperand(1).setSubReg(KilledProdSubReg);
+ MI->getOperand(3).setSubReg(AddSubReg);
+ MI->getOperand(2).setSubReg(OtherProdSubReg);
+
+ MI->getOperand(1).setIsKill(KilledProdRegKill);
+ MI->getOperand(3).setIsKill(AddRegKill);
+ MI->getOperand(2).setIsKill(OtherProdRegKill);
+
+ MI->getOperand(1).setIsUndef(KilledProdRegUndef);
+ MI->getOperand(3).setIsUndef(AddRegUndef);
+ MI->getOperand(2).setIsUndef(OtherProdRegUndef);
+
+ MI->setDesc(TII->get(AltOpc));
+
+ DEBUG(dbgs() << " -> " << *MI);
+
+ // The killed product operand was killed here, so we can reuse it now
+ // for the result of the fma.
+
+ LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
+ VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
+ for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
+ UI != UE;) {
+ MachineOperand &UseMO = *UI;
+ MachineInstr *UseMI = UseMO.getParent();
+ ++UI;
+
+ // Don't replace the result register of the copy we're about to erase.
+ if (UseMI == AddendMI)
+ continue;
+
+ UseMO.setReg(KilledProdReg);
+ UseMO.setSubReg(KilledProdSubReg);
+ }
+
+ // Extend the live intervals of the killed product operand to hold the
+ // fma result.
+
+ LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg);
+ for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end();
+ AI != AE; ++AI) {
+ // Don't add the segment that corresponds to the original copy.
+ if (AI->valno == AddendValNo)
+ continue;
+
+ VNInfo *NewFMAValNo =
+ NewFMAInt.getNextValue(AI->start,
+ LIS->getVNInfoAllocator());
+
+ NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end,
+ NewFMAValNo));
+ }
+ DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
+
+ FMAInt.removeValNo(FMAValNo);
+ DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
+
+ // Remove the (now unused) copy.
+
+ DEBUG(dbgs() << " removing: " << *AddendMI << '\n');
+ LIS->RemoveMachineInstrFromMaps(AddendMI);
+ AddendMI->eraseFromParent();
+
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ // If we don't have VSX then go ahead and return without doing
+ // anything.
+ if (!TM->getSubtargetImpl()->hasVSX())
+ return false;
+
+ LIS = &getAnalysis<LiveIntervals>();
+
+ TII = TM->getInstrInfo();
+
+ bool Changed = false;
+
+ if (DisableVSXFMAMutate)
+ return Changed;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
+ "PowerPC VSX FMA Mutation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
+ "PowerPC VSX FMA Mutation", false, false)
+
+char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
+
+char PPCVSXFMAMutate::ID = 0;
+FunctionPass*
+llvm::createPPCVSXFMAMutatePass() { return new PPCVSXFMAMutate(); }
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-vsx-copy"
+
+namespace llvm {
+ void initializePPCVSXCopyPass(PassRegistry&);
+}
+
+namespace {
+ // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
+ // (Altivec and scalar floating-point registers), we need to transform the
+ // copies into subregister copies with other restrictions.
+ struct PPCVSXCopy : public MachineFunctionPass {
+ static char ID;
+ PPCVSXCopy() : MachineFunctionPass(ID) {
+ initializePPCVSXCopyPass(*PassRegistry::getPassRegistry());
+ }
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+
+ bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
+ MachineRegisterInfo &MRI) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ return RC->hasSubClassEq(MRI.getRegClass(Reg));
+ } else if (RC->contains(Reg)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
+ }
+
+ bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
+ }
+
+ bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
+ }
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+ if (!MI->isFullCopy())
+ continue;
+
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ if ( IsVSReg(DstMO.getReg(), MRI) &&
+ !IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *to* a VSX register from a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *SrcRC =
+ IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+ &PPC::VSLRCRegClass;
+ assert((IsF8Reg(SrcMO.getReg(), MRI) ||
+ IsVRReg(SrcMO.getReg(), MRI)) &&
+ "Unknown source for a VSX copy");
+
+ unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
+ .addImm(1) // add 1, not 0, because there is no implicit clearing
+ // of the high bits.
+ .addOperand(SrcMO)
+ .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 :
+ PPC::sub_64);
+
+ // The source of the original copy is now the new virtual register.
+ SrcMO.setReg(NewVReg);
+ } else if (!IsVSReg(DstMO.getReg(), MRI) &&
+ IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *from* a VSX register to a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *DstRC =
+ IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+ &PPC::VSLRCRegClass;
+ assert((IsF8Reg(DstMO.getReg(), MRI) ||
+ IsVRReg(DstMO.getReg(), MRI)) &&
+ "Unknown destination for a VSX copy");
+
+ // Copy the VSX value into a new VSX register of the correct subclass.
+ unsigned NewVReg = MRI.createVirtualRegister(DstRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg)
+ .addOperand(SrcMO);
+
+ // Transform the original copy into a subregister extraction copy.
+ SrcMO.setReg(NewVReg);
+ SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 :
+ PPC::sub_64);
+ }
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ // If we don't have VSX on the subtarget, don't do anything.
+ if (!TM->getSubtargetImpl()->hasVSX())
+ return false;
+ TII = TM->getInstrInfo();
+
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
+ "PowerPC VSX Copy Legalization", false, false)
+
+char PPCVSXCopy::ID = 0;
+FunctionPass*
+llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-vsx-copy-cleanup"
+
+namespace llvm {
+ void initializePPCVSXCopyCleanupPass(PassRegistry&);
+}
+
+namespace {
+ // PPCVSXCopyCleanup pass - We sometimes end up generating self copies of VSX
+ // registers (mostly because the ABI code still places all values into the
+ // "traditional" floating-point and vector registers). Remove them here.
+ struct PPCVSXCopyCleanup : public MachineFunctionPass {
+ static char ID;
+ PPCVSXCopyCleanup() : MachineFunctionPass(ID) {
+ initializePPCVSXCopyCleanupPass(*PassRegistry::getPassRegistry());
+ }
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ SmallVector<MachineInstr *, 4> ToDelete;
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+ if (MI->getOpcode() == PPC::XXLOR &&
+ MI->getOperand(0).getReg() == MI->getOperand(1).getReg() &&
+ MI->getOperand(0).getReg() == MI->getOperand(2).getReg())
+ ToDelete.push_back(MI);
+ }
+
+ if (!ToDelete.empty())
+ Changed = true;
+
+ for (unsigned i = 0, ie = ToDelete.size(); i != ie; ++i) {
+ DEBUG(dbgs() << "Removing VSX self-copy: " << *ToDelete[i]);
+ ToDelete[i]->eraseFromParent();
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ // If we don't have VSX don't bother doing anything here.
+ if (!TM->getSubtargetImpl()->hasVSX())
+ return false;
+ TII = TM->getInstrInfo();
+
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCVSXCopyCleanup, DEBUG_TYPE,
+ "PowerPC VSX Copy Cleanup", false, false)
+
+char PPCVSXCopyCleanup::ID = 0;
+FunctionPass*
+llvm::createPPCVSXCopyCleanupPass() { return new PPCVSXCopyCleanup(); }
+
+#undef DEBUG_TYPE
#define DEBUG_TYPE "ppc-early-ret"
STATISTIC(NumBCLR, "Number of early conditional returns");
STATISTIC(NumBLR, "Number of early returns");
@@ -1428,7 +2125,7 @@ protected:
if (J->getOpcode() == PPC::B) {
if (J->getOperand(0).getMBB() == &ReturnMBB) {
// This is an unconditional branch to the return. Replace the
- // branch with a blr.
+ // branch with a blr.
BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR));
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
@@ -1440,7 +2137,7 @@ protected:
if (J->getOperand(2).getMBB() == &ReturnMBB) {
// This is a conditional branch to the return. Replace the branch
// with a bclr.
- BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCLR))
+ BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
.addImm(J->getOperand(0).getImm())
.addReg(J->getOperand(1).getReg());
MachineBasicBlock::iterator K = J--;
@@ -1449,6 +2146,20 @@ protected:
++NumBCLR;
continue;
}
+ } else if (J->getOpcode() == PPC::BC || J->getOpcode() == PPC::BCn) {
+ if (J->getOperand(1).getMBB() == &ReturnMBB) {
+ // This is a conditional branch to the return. Replace the branch
+ // with a bclr.
+ BuildMI(**PI, J, J->getDebugLoc(),
+ TII->get(J->getOpcode() == PPC::BC ?
+ PPC::BCLR : PPC::BCLRn))
+ .addReg(J->getOperand(0).getReg());
+ MachineBasicBlock::iterator K = J--;
+ K->eraseFromParent();
+ BlockChanged = true;
+ ++NumBCLR;
+ continue;
+ }
} else if (J->isBranch()) {
if (J->isIndirectBranch()) {
if (ReturnMBB.hasAddressTaken())
@@ -1470,7 +2181,7 @@ protected:
if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
OtherReference = true;
- // Predecessors are stored in a vector and can't be removed here.
+ // Predecessors are stored in a vector and can't be removed here.
if (!OtherReference && BlockChanged) {
PredToRemove.push_back(*PI);
}
@@ -1501,7 +2212,7 @@ protected:
}
public:
- virtual bool runOnMachineFunction(MachineFunction &MF) {
+ bool runOnMachineFunction(MachineFunction &MF) override {
TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
TII = TM->getInstrInfo();
@@ -1513,7 +2224,7 @@ public:
return Changed;
for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
- MachineBasicBlock &B = *I++;
+ MachineBasicBlock &B = *I++;
if (processBlock(B))
Changed = true;
}
@@ -1521,7 +2232,7 @@ public:
return Changed;
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -1533,4 +2244,3 @@ INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
char PPCEarlyReturn::ID = 0;
FunctionPass*
llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
-
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index f140c41a2a89..83f14c6cf214 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -65,7 +65,7 @@ enum PPC970_Unit {
class PPCInstrInfo : public PPCGenInstrInfo {
- PPCTargetMachine &TM;
+ PPCSubtarget &Subtarget;
const PPCRegisterInfo RI;
bool StoreRegToStackSlot(MachineFunction &MF,
@@ -80,142 +80,154 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool &NonRI, bool &SpillsVRS) const;
virtual void anchor();
public:
- explicit PPCInstrInfo(PPCTargetMachine &TM);
+ explicit PPCInstrInfo(PPCSubtarget &STI);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; }
+ const PPCRegisterInfo &getRegisterInfo() const { return RI; }
ScheduleHazardRecognizer *
- CreateTargetHazardRecognizer(const TargetMachine *TM,
- const ScheduleDAG *DAG) const;
+ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const override;
ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
- const ScheduleDAG *DAG) const;
+ const ScheduleDAG *DAG) const override;
+
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const override;
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const override {
+ return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx,
+ UseNode, UseIdx);
+ }
bool isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
- unsigned &SubIdx) const;
+ unsigned &SubIdx) const override;
unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
+ int &FrameIndex) const override;
unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
+ int &FrameIndex) const override;
// commuteInstruction - We can commute rlwimi instructions, but only if the
// rotate amt is zero. We also have to munge the immediates a bit.
- virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+ MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const override;
+
+ bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const override;
- virtual void insertNoop(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
// Branch analysis.
- virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
- virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const override;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const override;
// Select analysis.
- virtual bool canInsertSelect(const MachineBasicBlock&,
- const SmallVectorImpl<MachineOperand> &Cond,
- unsigned, unsigned, int&, int&, int&) const;
- virtual void insertSelect(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DstReg,
- const SmallVectorImpl<MachineOperand> &Cond,
- unsigned TrueReg, unsigned FalseReg) const;
-
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
-
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
- virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
- unsigned Reg, MachineRegisterInfo *MRI) const;
+ bool canInsertSelect(const MachineBasicBlock&,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned, unsigned, int&, int&, int&) const override;
+ void insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DstReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const override;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+
+ bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const override;
// If conversion by predication (only supported by some branch instructions).
// All of the profitability checks always return true; it is always
// profitable to use the predicated branches.
- virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumCycles, unsigned ExtraPredCycles,
- const BranchProbability &Probability) const {
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const override {
return true;
}
- virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
- unsigned NumT, unsigned ExtraT,
- MachineBasicBlock &FMBB,
- unsigned NumF, unsigned ExtraF,
- const BranchProbability &Probability) const;
+ bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ const BranchProbability &Probability) const override;
- virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
- unsigned NumCycles,
- const BranchProbability
- &Probability) const {
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles,
+ const BranchProbability
+ &Probability) const override {
return true;
}
- virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
- MachineBasicBlock &FMBB) const {
+ bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const override {
return false;
}
// Predication support.
- bool isPredicated(const MachineInstr *MI) const;
+ bool isPredicated(const MachineInstr *MI) const override;
- virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+ bool isUnpredicatedTerminator(const MachineInstr *MI) const override;
- virtual
bool PredicateInstruction(MachineInstr *MI,
- const SmallVectorImpl<MachineOperand> &Pred) const;
+ const SmallVectorImpl<MachineOperand> &Pred) const override;
- virtual
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2) const;
+ const SmallVectorImpl<MachineOperand> &Pred2) const override;
- virtual bool DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const;
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const override;
- virtual bool isPredicable(MachineInstr *MI) const;
+ bool isPredicable(MachineInstr *MI) const override;
// Comparison optimization.
- virtual bool analyzeCompare(const MachineInstr *MI,
- unsigned &SrcReg, unsigned &SrcReg2,
- int &Mask, int &Value) const;
+ bool analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const override;
- virtual bool optimizeCompareInstr(MachineInstr *CmpInstr,
- unsigned SrcReg, unsigned SrcReg2,
- int Mask, int Value,
- const MachineRegisterInfo *MRI) const;
+ bool optimizeCompareInstr(MachineInstr *CmpInstr,
+ unsigned SrcReg, unsigned SrcReg2,
+ int Mask, int Value,
+ const MachineRegisterInfo *MRI) const override;
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
///
- virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+ unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
};
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index fc29c69642bf..42b740f4fa46 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -57,6 +57,9 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisVT<1, i32>
]>;
+def tocentry32 : Operand<iPTR> {
+ let MIOperandInfo = (ops i32imm:$imm);
+}
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
@@ -99,6 +102,8 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+def PPCppc32GOT : SDNode<"PPCISD::PPC32_GOT", SDTIntLeaf, []>;
+
def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>;
def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
[SDNPMayLoad]>;
@@ -139,9 +144,6 @@ def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
[SDNPHasChain, SDNPSideEffect,
SDNPInGlue, SDNPOutGlue]>;
-def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
- [SDNPHasChain, SDNPSideEffect,
- SDNPInGlue, SDNPOutGlue]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
@@ -288,6 +290,12 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{
return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
}], HI16>;
+def imm64ZExt32 : Operand<i64>, ImmLeaf<i64, [{
+ // imm64ZExt32 predicate - True if the i64 immediate fits in a 32-bit
+ // zero extended field.
+ return isUInt<32>(Imm);
+}]>;
+
// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
// restricted memrix (4-aligned) constants are alignment sensitive. If these
// offsets are hidden behind TOC entries than the values of the lower-order
@@ -404,6 +412,14 @@ def crrc : RegisterOperand<CRRC> {
let ParserMatchClass = PPCRegCRRCAsmOperand;
}
+def PPCU2ImmAsmOperand : AsmOperandClass {
+ let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u2imm : Operand<i32> {
+ let PrintMethod = "printU2ImmOperand";
+ let ParserMatchClass = PPCU2ImmAsmOperand;
+}
def PPCS5ImmAsmOperand : AsmOperandClass {
let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
let RenderMethod = "addImmOperands";
@@ -411,6 +427,7 @@ def PPCS5ImmAsmOperand : AsmOperandClass {
def s5imm : Operand<i32> {
let PrintMethod = "printS5ImmOperand";
let ParserMatchClass = PPCS5ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<5>";
}
def PPCU5ImmAsmOperand : AsmOperandClass {
let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
@@ -419,6 +436,7 @@ def PPCU5ImmAsmOperand : AsmOperandClass {
def u5imm : Operand<i32> {
let PrintMethod = "printU5ImmOperand";
let ParserMatchClass = PPCU5ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<5>";
}
def PPCU6ImmAsmOperand : AsmOperandClass {
let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
@@ -427,6 +445,7 @@ def PPCU6ImmAsmOperand : AsmOperandClass {
def u6imm : Operand<i32> {
let PrintMethod = "printU6ImmOperand";
let ParserMatchClass = PPCU6ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<6>";
}
def PPCS16ImmAsmOperand : AsmOperandClass {
let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
@@ -436,6 +455,7 @@ def s16imm : Operand<i32> {
let PrintMethod = "printS16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS16ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
}
def PPCU16ImmAsmOperand : AsmOperandClass {
let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
@@ -445,6 +465,7 @@ def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCU16ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<16>";
}
def PPCS17ImmAsmOperand : AsmOperandClass {
let Name = "S17Imm"; let PredicateMethod = "isS17Imm";
@@ -457,6 +478,7 @@ def s17imm : Operand<i32> {
let PrintMethod = "printS16ImmOperand";
let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS17ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
}
def PPCDirectBrAsmOperand : AsmOperandClass {
let Name = "DirectBr"; let PredicateMethod = "isDirectBr";
@@ -502,6 +524,7 @@ def PPCCRBitMaskOperand : AsmOperandClass {
def crbitm: Operand<i8> {
let PrintMethod = "printcrbitm";
let EncoderMethod = "get_crbitm_encoding";
+ let DecoderMethod = "decodeCRBitMOperand";
let ParserMatchClass = PPCCRBitMaskOperand;
}
// Address operands
@@ -539,6 +562,7 @@ def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIEncoding";
+ let DecoderMethod = "decodeMemRIOperands";
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
@@ -548,6 +572,7 @@ def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIXEncoding";
+ let DecoderMethod = "decodeMemRIXOperands";
}
// A single-register address. This is used with the SjLj
@@ -555,6 +580,14 @@ def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
def memr : Operand<iPTR> {
let MIOperandInfo = (ops ptr_rc:$ptrreg);
}
+def PPCTLSRegOperand : AsmOperandClass {
+ let Name = "TLSReg"; let PredicateMethod = "isTLSReg";
+ let RenderMethod = "addTLSRegOperands";
+}
+def tlsreg32 : Operand<i32> {
+ let EncoderMethod = "getTLSRegEncoding";
+ let ParserMatchClass = PPCTLSRegOperand;
+}
// PowerPC Predicate operand.
def pred : Operand<OtherVT> {
@@ -577,10 +610,10 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
-def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
-def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
-def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
-def IsNotBookE : Predicate<"!PPCSubTarget.isBookE()">;
+def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">;
+def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">;
+def IsBookE : Predicate<"PPCSubTarget->isBookE()">;
+def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
@@ -614,20 +647,6 @@ multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
}
}
-multiclass XForm_10r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
- string asmbase, string asmstr, InstrItinClass itin,
- list<dag> pattern> {
- let BaseName = asmbase in {
- def NAME : XForm_10<opcode, xo, OOL, IOL,
- !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
- pattern>, RecFormRel;
- let Defs = [CR0] in
- def o : XForm_10<opcode, xo, OOL, IOL,
- !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT, RecFormRel;
- }
-}
-
multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
@@ -888,30 +907,63 @@ let usesCustomInserter = 1, // Expanded after instruction selection.
def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
i32imm:$BROPC), "#SELECT_CC_VRRC",
[]>;
+
+ // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
+ // register bit directly.
+ def SELECT_I4 : Pseudo<(outs gprc:$dst), (ins crbitrc:$cond,
+ gprc_nor0:$T, gprc_nor0:$F), "#SELECT_I4",
+ [(set i32:$dst, (select i1:$cond, i32:$T, i32:$F))]>;
+ def SELECT_I8 : Pseudo<(outs g8rc:$dst), (ins crbitrc:$cond,
+ g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8",
+ [(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>;
+ def SELECT_F4 : Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond,
+ f4rc:$T, f4rc:$F), "#SELECT_F4",
+ [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>;
+ def SELECT_F8 : Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond,
+ f8rc:$T, f8rc:$F), "#SELECT_F8",
+ [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>;
+ def SELECT_VRRC: Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
+ vrrc:$T, vrrc:$F), "#SELECT_VRRC",
+ [(set v4i32:$dst,
+ (select i1:$cond, v4i32:$T, v4i32:$F))]>;
}
// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
// scavenge a register for it.
-let mayStore = 1 in
+let mayStore = 1 in {
def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F),
"#SPILL_CR", []>;
+def SPILL_CRBIT : Pseudo<(outs), (ins crbitrc:$cond, memri:$F),
+ "#SPILL_CRBIT", []>;
+}
// RESTORE_CR - Indicate that we're restoring the CR register (previously
// spilled), so we'll need to scavenge a register for it.
-let mayLoad = 1 in
+let mayLoad = 1 in {
def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F),
"#RESTORE_CR", []>;
+def RESTORE_CRBIT : Pseudo<(outs crbitrc:$cond), (ins memri:$F),
+ "#RESTORE_CRBIT", []>;
+}
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
let isReturn = 1, Uses = [LR, RM] in
- def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB,
+ def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
[(retflag)]>;
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in {
- def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
+ def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>;
- let isCodeGenOnly = 1 in
- def BCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
- "b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>;
+ let isCodeGenOnly = 1 in {
+ def BCCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB,
+ []>;
+
+ def BCCTR : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 12, $bi, 0", IIC_BrB, []>;
+ def BCCTRn : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 4, $bi, 0", IIC_BrB, []>;
+ }
}
}
@@ -922,10 +974,10 @@ let Defs = [LR] in
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let isBarrier = 1 in {
def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
- "b $dst", BrB,
+ "b $dst", IIC_BrB,
[(br bb:$dst)]>;
def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst),
- "ba $dst", BrB, []>;
+ "ba $dst", IIC_BrB, []>;
}
// BCC represents an arbitrary conditional branch on a predicate.
@@ -939,23 +991,39 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
"b${cond:cc}a${cond:pm} ${cond:reg}, $dst">;
let isReturn = 1, Uses = [LR, RM] in
- def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
- "b${cond:cc}lr${cond:pm} ${cond:reg}", BrB, []>;
+ def BCCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}lr${cond:pm} ${cond:reg}", IIC_BrB, []>;
+ }
+
+ let isCodeGenOnly = 1 in {
+ let Pattern = [(brcond i1:$bi, bb:$dst)] in
+ def BC : BForm_4<16, 12, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst),
+ "bc 12, $bi, $dst">;
+
+ let Pattern = [(brcond (not i1:$bi), bb:$dst)] in
+ def BCn : BForm_4<16, 4, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst),
+ "bc 4, $bi, $dst">;
+
+ let isReturn = 1, Uses = [LR, RM] in
+ def BCLR : XLForm_2_br2<19, 16, 12, 0, (outs), (ins crbitrc:$bi),
+ "bclr 12, $bi, 0", IIC_BrB, []>;
+ def BCLRn : XLForm_2_br2<19, 16, 4, 0, (outs), (ins crbitrc:$bi),
+ "bclr 4, $bi, 0", IIC_BrB, []>;
}
let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
- "bdzlr", BrB, []>;
+ "bdzlr", IIC_BrB, []>;
def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
- "bdnzlr", BrB, []>;
+ "bdnzlr", IIC_BrB, []>;
def BDZLRp : XLForm_2_ext<19, 16, 27, 0, 0, (outs), (ins),
- "bdzlr+", BrB, []>;
+ "bdzlr+", IIC_BrB, []>;
def BDNZLRp: XLForm_2_ext<19, 16, 25, 0, 0, (outs), (ins),
- "bdnzlr+", BrB, []>;
+ "bdnzlr+", IIC_BrB, []>;
def BDZLRm : XLForm_2_ext<19, 16, 26, 0, 0, (outs), (ins),
- "bdzlr-", BrB, []>;
+ "bdzlr-", IIC_BrB, []>;
def BDNZLRm: XLForm_2_ext<19, 16, 24, 0, 0, (outs), (ins),
- "bdnzlr-", BrB, []>;
+ "bdnzlr-", IIC_BrB, []>;
}
let Defs = [CTR], Uses = [CTR] in {
@@ -998,33 +1066,54 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
+ "bl $func", IIC_BrB, []>; // See Pat patterns below.
def BLA : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
- "bla $func", BrB, [(PPCcall (i32 imm:$func))]>;
+ "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>;
let isCodeGenOnly = 1 in {
def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst),
"b${cond:cc}l${cond:pm} ${cond:reg}, $dst">;
def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst),
"b${cond:cc}la${cond:pm} ${cond:reg}, $dst">;
+
+ def BCL : BForm_4<16, 12, 0, 1, (outs),
+ (ins crbitrc:$bi, condbrtarget:$dst),
+ "bcl 12, $bi, $dst">;
+ def BCLn : BForm_4<16, 4, 0, 1, (outs),
+ (ins crbitrc:$bi, condbrtarget:$dst),
+ "bcl 4, $bi, $dst">;
}
}
let Uses = [CTR, RM] in {
def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
- "bctrl", BrB, [(PPCbctrl)]>,
+ "bctrl", IIC_BrB, [(PPCbctrl)]>,
Requires<[In32BitMode]>;
- let isCodeGenOnly = 1 in
- def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
- "b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>;
+ let isCodeGenOnly = 1 in {
+ def BCCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB,
+ []>;
+
+ def BCCTRL : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 12, $bi, 0", IIC_BrB, []>;
+ def BCCTRLn : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 4, $bi, 0", IIC_BrB, []>;
+ }
}
let Uses = [LR, RM] in {
def BLRL : XLForm_2_ext<19, 16, 20, 0, 1, (outs), (ins),
- "blrl", BrB, []>;
+ "blrl", IIC_BrB, []>;
- let isCodeGenOnly = 1 in
- def BCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond),
- "b${cond:cc}lrl${cond:pm} ${cond:reg}", BrB, []>;
+ let isCodeGenOnly = 1 in {
+ def BCCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}lrl${cond:pm} ${cond:reg}", IIC_BrB,
+ []>;
+
+ def BCLRL : XLForm_2_br2<19, 16, 12, 1, (outs), (ins crbitrc:$bi),
+ "bclrl 12, $bi, 0", IIC_BrB, []>;
+ def BCLRLn : XLForm_2_br2<19, 16, 4, 1, (outs), (ins crbitrc:$bi),
+ "bclrl 4, $bi, 0", IIC_BrB, []>;
+ }
}
let Defs = [CTR], Uses = [CTR, RM] in {
def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst),
@@ -1054,17 +1143,17 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
}
let Defs = [CTR], Uses = [CTR, LR, RM] in {
def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins),
- "bdzlrl", BrB, []>;
+ "bdzlrl", IIC_BrB, []>;
def BDNZLRL : XLForm_2_ext<19, 16, 16, 0, 1, (outs), (ins),
- "bdnzlrl", BrB, []>;
+ "bdnzlrl", IIC_BrB, []>;
def BDZLRLp : XLForm_2_ext<19, 16, 27, 0, 1, (outs), (ins),
- "bdzlrl+", BrB, []>;
+ "bdzlrl+", IIC_BrB, []>;
def BDNZLRLp: XLForm_2_ext<19, 16, 25, 0, 1, (outs), (ins),
- "bdnzlrl+", BrB, []>;
+ "bdnzlrl+", IIC_BrB, []>;
def BDZLRLm : XLForm_2_ext<19, 16, 26, 0, 1, (outs), (ins),
- "bdzlrl-", BrB, []>;
+ "bdzlrl-", IIC_BrB, []>;
def BDNZLRLm: XLForm_2_ext<19, 16, 24, 0, 1, (outs), (ins),
- "bdnzlrl-", BrB, []>;
+ "bdnzlrl-", IIC_BrB, []>;
}
}
@@ -1090,19 +1179,19 @@ let isCodeGenOnly = 1 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
-def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
- Requires<[In32BitMode]>;
+def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>, Requires<[In32BitMode]>;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
- "b $dst", BrB,
+ "b $dst", IIC_BrB,
[]>;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
- "ba $dst", BrB,
+ "ba $dst", IIC_BrB,
[]>;
}
@@ -1128,33 +1217,33 @@ let isBranch = 1, isTerminator = 1 in {
// System call.
let PPC970_Unit = 7 in {
def SC : SCForm<17, 1, (outs), (ins i32imm:$lev),
- "sc $lev", BrB, [(PPCsc (i32 imm:$lev))]>;
+ "sc $lev", IIC_BrB, [(PPCsc (i32 imm:$lev))]>;
}
// DCB* instructions.
-def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
- "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
+def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst",
+ IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst),
- "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
+def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst), "dcbf $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst),
- "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
+def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), "dcbi $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst),
- "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
+def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), "dcbst $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst),
- "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
+def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst), "dcbt $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst),
- "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
+def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst), "dcbtst $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst),
- "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
+def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), "dcbz $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
PPC970_DGroup_Single;
-def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
- "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
+def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
PPC970_DGroup_Single;
def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
@@ -1242,26 +1331,26 @@ let usesCustomInserter = 1 in {
// Instructions to support atomic operations
def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
- "lwarx $rD, $src", LdStLWARX,
+ "lwarx $rD, $src", IIC_LdStLWARX,
[(set i32:$rD, (PPClarx xoaddr:$src))]>;
let Defs = [CR0] in
def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
- "stwcx. $rS, $dst", LdStSTWCX,
+ "stwcx. $rS, $dst", IIC_LdStSTWCX,
[(PPCstcx i32:$rS, xoaddr:$dst)]>,
isDOT;
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
-def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
+def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>;
def TWI : DForm_base<3, (outs), (ins u5imm:$to, gprc:$rA, s16imm:$imm),
- "twi $to, $rA, $imm", IntTrapW, []>;
+ "twi $to, $rA, $imm", IIC_IntTrapW, []>;
def TW : XForm_1<31, 4, (outs), (ins u5imm:$to, gprc:$rA, gprc:$rB),
- "tw $to, $rA, $rB", IntTrapW, []>;
+ "tw $to, $rA, $rB", IIC_IntTrapW, []>;
def TDI : DForm_base<2, (outs), (ins u5imm:$to, g8rc:$rA, s16imm:$imm),
- "tdi $to, $rA, $imm", IntTrapD, []>;
+ "tdi $to, $rA, $imm", IIC_IntTrapD, []>;
def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
- "td $to, $rA, $rB", IntTrapD, []>;
+ "td $to, $rA, $rB", IIC_IntTrapD, []>;
//===----------------------------------------------------------------------===//
// PPC32 Load Instructions.
@@ -1270,56 +1359,56 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
// Unindexed (r+i) Loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
- "lbz $rD, $src", LdStLoad,
+ "lbz $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 iaddr:$src))]>;
def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src),
- "lha $rD, $src", LdStLHA,
+ "lha $rD, $src", IIC_LdStLHA,
[(set i32:$rD, (sextloadi16 iaddr:$src))]>,
PPC970_DGroup_Cracked;
def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src),
- "lhz $rD, $src", LdStLoad,
+ "lhz $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi16 iaddr:$src))]>;
def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src),
- "lwz $rD, $src", LdStLoad,
+ "lwz $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (load iaddr:$src))]>;
def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src),
- "lfs $rD, $src", LdStLFD,
+ "lfs $rD, $src", IIC_LdStLFD,
[(set f32:$rD, (load iaddr:$src))]>;
def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src),
- "lfd $rD, $src", LdStLFD,
+ "lfd $rD, $src", IIC_LdStLFD,
[(set f64:$rD, (load iaddr:$src))]>;
// Unindexed (r+i) Loads with Update (preinc).
let mayLoad = 1, neverHasSideEffects = 1 in {
def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lbzu $rD, $addr", LdStLoadUpd,
+ "lbzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lhau $rD, $addr", LdStLHAU,
+ "lhau $rD, $addr", IIC_LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lhzu $rD, $addr", LdStLoadUpd,
+ "lhzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lwzu $rD, $addr", LdStLoadUpd,
+ "lwzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lfsu $rD, $addr", LdStLFDU,
+ "lfsu $rD, $addr", IIC_LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lfdu $rD, $addr", LdStLFDU,
+ "lfdu $rD, $addr", IIC_LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
@@ -1327,37 +1416,37 @@ def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr
// Indexed (r+r) Loads with Update (preinc).
def LBZUX : XForm_1<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lbzux $rD, $addr", LdStLoadUpd,
+ "lbzux $rD, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LHAUX : XForm_1<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lhaux $rD, $addr", LdStLHAU,
+ "lhaux $rD, $addr", IIC_LdStLHAUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LHZUX : XForm_1<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lhzux $rD, $addr", LdStLoadUpd,
+ "lhzux $rD, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LWZUX : XForm_1<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lwzux $rD, $addr", LdStLoadUpd,
+ "lwzux $rD, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LFSUX : XForm_1<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lfsux $rD, $addr", LdStLFDU,
+ "lfsux $rD, $addr", IIC_LdStLFDUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
- "lfdux $rD, $addr", LdStLFDU,
+ "lfdux $rD, $addr", IIC_LdStLFDUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
}
@@ -1367,45 +1456,45 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
//
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src),
- "lbzx $rD, $src", LdStLoad,
+ "lbzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 xaddr:$src))]>;
def LHAX : XForm_1<31, 343, (outs gprc:$rD), (ins memrr:$src),
- "lhax $rD, $src", LdStLHA,
+ "lhax $rD, $src", IIC_LdStLHA,
[(set i32:$rD, (sextloadi16 xaddr:$src))]>,
PPC970_DGroup_Cracked;
def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src),
- "lhzx $rD, $src", LdStLoad,
+ "lhzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi16 xaddr:$src))]>;
def LWZX : XForm_1<31, 23, (outs gprc:$rD), (ins memrr:$src),
- "lwzx $rD, $src", LdStLoad,
+ "lwzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (load xaddr:$src))]>;
def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src),
- "lhbrx $rD, $src", LdStLoad,
+ "lhbrx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
def LWBRX : XForm_1<31, 534, (outs gprc:$rD), (ins memrr:$src),
- "lwbrx $rD, $src", LdStLoad,
+ "lwbrx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>;
def LFSX : XForm_25<31, 535, (outs f4rc:$frD), (ins memrr:$src),
- "lfsx $frD, $src", LdStLFD,
+ "lfsx $frD, $src", IIC_LdStLFD,
[(set f32:$frD, (load xaddr:$src))]>;
def LFDX : XForm_25<31, 599, (outs f8rc:$frD), (ins memrr:$src),
- "lfdx $frD, $src", LdStLFD,
+ "lfdx $frD, $src", IIC_LdStLFD,
[(set f64:$frD, (load xaddr:$src))]>;
def LFIWAX : XForm_25<31, 855, (outs f8rc:$frD), (ins memrr:$src),
- "lfiwax $frD, $src", LdStLFD,
+ "lfiwax $frD, $src", IIC_LdStLFD,
[(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
def LFIWZX : XForm_25<31, 887, (outs f8rc:$frD), (ins memrr:$src),
- "lfiwzx $frD, $src", LdStLFD,
+ "lfiwzx $frD, $src", IIC_LdStLFD,
[(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
}
// Load Multiple
def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
- "lmw $rD, $src", LdStLMW, []>;
+ "lmw $rD, $src", IIC_LdStLMW, []>;
//===----------------------------------------------------------------------===//
// PPC32 Store Instructions.
@@ -1414,38 +1503,38 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
// Unindexed (r+i) Stores.
let PPC970_Unit = 2 in {
def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
- "stb $rS, $src", LdStStore,
+ "stb $rS, $src", IIC_LdStStore,
[(truncstorei8 i32:$rS, iaddr:$src)]>;
def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src),
- "sth $rS, $src", LdStStore,
+ "sth $rS, $src", IIC_LdStStore,
[(truncstorei16 i32:$rS, iaddr:$src)]>;
def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src),
- "stw $rS, $src", LdStStore,
+ "stw $rS, $src", IIC_LdStStore,
[(store i32:$rS, iaddr:$src)]>;
def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst),
- "stfs $rS, $dst", LdStSTFD,
+ "stfs $rS, $dst", IIC_LdStSTFD,
[(store f32:$rS, iaddr:$dst)]>;
def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
- "stfd $rS, $dst", LdStSTFD,
+ "stfd $rS, $dst", IIC_LdStSTFD,
[(store f64:$rS, iaddr:$dst)]>;
}
// Unindexed (r+i) Stores with Update (preinc).
let PPC970_Unit = 2, mayStore = 1 in {
def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "stbu $rS, $dst", LdStStoreUpd, []>,
+ "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "sthu $rS, $dst", LdStStoreUpd, []>,
+ "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "stwu $rS, $dst", LdStStoreUpd, []>,
+ "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst),
- "stfsu $rS, $dst", LdStSTFDU, []>,
+ "stfsu $rS, $dst", IIC_LdStSTFDU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst),
- "stfdu $rS, $dst", LdStSTFDU, []>,
+ "stfdu $rS, $dst", IIC_LdStSTFDU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
}
@@ -1466,59 +1555,59 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
// Indexed (r+r) Stores.
let PPC970_Unit = 2 in {
def STBX : XForm_8<31, 215, (outs), (ins gprc:$rS, memrr:$dst),
- "stbx $rS, $dst", LdStStore,
+ "stbx $rS, $dst", IIC_LdStStore,
[(truncstorei8 i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHX : XForm_8<31, 407, (outs), (ins gprc:$rS, memrr:$dst),
- "sthx $rS, $dst", LdStStore,
+ "sthx $rS, $dst", IIC_LdStStore,
[(truncstorei16 i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STWX : XForm_8<31, 151, (outs), (ins gprc:$rS, memrr:$dst),
- "stwx $rS, $dst", LdStStore,
+ "stwx $rS, $dst", IIC_LdStStore,
[(store i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHBRX: XForm_8<31, 918, (outs), (ins gprc:$rS, memrr:$dst),
- "sthbrx $rS, $dst", LdStStore,
+ "sthbrx $rS, $dst", IIC_LdStStore,
[(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>,
PPC970_DGroup_Cracked;
def STWBRX: XForm_8<31, 662, (outs), (ins gprc:$rS, memrr:$dst),
- "stwbrx $rS, $dst", LdStStore,
+ "stwbrx $rS, $dst", IIC_LdStStore,
[(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>,
PPC970_DGroup_Cracked;
def STFIWX: XForm_28<31, 983, (outs), (ins f8rc:$frS, memrr:$dst),
- "stfiwx $frS, $dst", LdStSTFD,
+ "stfiwx $frS, $dst", IIC_LdStSTFD,
[(PPCstfiwx f64:$frS, xoaddr:$dst)]>;
def STFSX : XForm_28<31, 663, (outs), (ins f4rc:$frS, memrr:$dst),
- "stfsx $frS, $dst", LdStSTFD,
+ "stfsx $frS, $dst", IIC_LdStSTFD,
[(store f32:$frS, xaddr:$dst)]>;
def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
- "stfdx $frS, $dst", LdStSTFD,
+ "stfdx $frS, $dst", IIC_LdStSTFD,
[(store f64:$frS, xaddr:$dst)]>;
}
// Indexed (r+r) Stores with Update (preinc).
let PPC970_Unit = 2, mayStore = 1 in {
def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
- "stbux $rS, $dst", LdStStoreUpd, []>,
+ "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
- "sthux $rS, $dst", LdStStoreUpd, []>,
+ "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
- "stwux $rS, $dst", LdStStoreUpd, []>,
+ "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memrr:$dst),
- "stfsux $rS, $dst", LdStSTFDU, []>,
+ "stfsux $rS, $dst", IIC_LdStSTFDU, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memrr:$dst),
- "stfdux $rS, $dst", LdStSTFDU, []>,
+ "stfdux $rS, $dst", IIC_LdStSTFDU, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
}
@@ -1539,14 +1628,14 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
// Store Multiple
def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
- "stmw $rS, $dst", LdStLMW, []>;
+ "stmw $rS, $dst", IIC_LdStLMW, []>;
def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
- "sync $L", LdStSync, []>, Requires<[IsNotBookE]>;
+ "sync $L", IIC_LdStSync, []>, Requires<[IsNotBookE]>;
let isCodeGenOnly = 1 in {
def MSYNC : XForm_24_sync<31, 598, (outs), (ins),
- "msync", LdStSync, []>, Requires<[IsBookE]> {
+ "msync", IIC_LdStSync, []>, Requires<[IsBookE]> {
let L = 0;
}
}
@@ -1560,41 +1649,41 @@ def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
let PPC970_Unit = 1 in { // FXU Operations.
def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm),
- "addi $rD, $rA, $imm", IntSimple,
+ "addi $rD, $rA, $imm", IIC_IntSimple,
[(set i32:$rD, (add i32:$rA, imm32SExt16:$imm))]>;
let BaseName = "addic" in {
let Defs = [CARRY] in
def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
- "addic $rD, $rA, $imm", IntGeneral,
+ "addic $rD, $rA, $imm", IIC_IntGeneral,
[(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>,
RecFormRel, PPC970_DGroup_Cracked;
let Defs = [CARRY, CR0] in
def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
- "addic. $rD, $rA, $imm", IntGeneral,
+ "addic. $rD, $rA, $imm", IIC_IntGeneral,
[]>, isDOT, RecFormRel;
}
def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm),
- "addis $rD, $rA, $imm", IntSimple,
+ "addis $rD, $rA, $imm", IIC_IntSimple,
[(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
let isCodeGenOnly = 1 in
def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$sym),
- "la $rD, $sym($rA)", IntGeneral,
+ "la $rD, $sym($rA)", IIC_IntGeneral,
[(set i32:$rD, (add i32:$rA,
(PPClo tglobaladdr:$sym, 0)))]>;
def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
- "mulli $rD, $rA, $imm", IntMulLI,
+ "mulli $rD, $rA, $imm", IIC_IntMulLI,
[(set i32:$rD, (mul i32:$rA, imm32SExt16:$imm))]>;
let Defs = [CARRY] in
def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
- "subfic $rD, $rA, $imm", IntGeneral,
+ "subfic $rD, $rA, $imm", IIC_IntGeneral,
[(set i32:$rD, (subc imm32SExt16:$imm, i32:$rA))]>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm),
- "li $rD, $imm", IntSimple,
+ "li $rD, $imm", IIC_IntSimple,
[(set i32:$rD, imm32SExt16:$imm)]>;
def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s17imm:$imm),
- "lis $rD, $imm", IntSimple,
+ "lis $rD, $imm", IIC_IntSimple,
[(set i32:$rD, imm16ShiftedSExt:$imm)]>;
}
}
@@ -1602,154 +1691,170 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
let PPC970_Unit = 1 in { // FXU Operations.
let Defs = [CR0] in {
def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "andi. $dst, $src1, $src2", IntGeneral,
+ "andi. $dst, $src1, $src2", IIC_IntGeneral,
[(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
isDOT;
def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "andis. $dst, $src1, $src2", IntGeneral,
+ "andis. $dst, $src1, $src2", IIC_IntGeneral,
[(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
isDOT;
}
def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "ori $dst, $src1, $src2", IntSimple,
+ "ori $dst, $src1, $src2", IIC_IntSimple,
[(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
def ORIS : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "oris $dst, $src1, $src2", IntSimple,
+ "oris $dst, $src1, $src2", IIC_IntSimple,
[(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "xori $dst, $src1, $src2", IntSimple,
+ "xori $dst, $src1, $src2", IIC_IntSimple,
[(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "xoris $dst, $src1, $src2", IntSimple,
+ "xoris $dst, $src1, $src2", IIC_IntSimple,
[(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
-def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple,
+
+def NOP : DForm_4_zero<24, (outs), (ins), "nop", IIC_IntSimple,
[]>;
+let isCodeGenOnly = 1 in {
+// The POWER6 and POWER7 have special group-terminating nops.
+def NOP_GT_PWR6 : DForm_4_fixedreg_zero<24, 1, (outs), (ins),
+ "ori 1, 1, 0", IIC_IntSimple, []>;
+def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins),
+ "ori 2, 2, 0", IIC_IntSimple, []>;
+}
+
let isCompare = 1, neverHasSideEffects = 1 in {
def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm),
- "cmpwi $crD, $rA, $imm", IntCompare>;
+ "cmpwi $crD, $rA, $imm", IIC_IntCompare>;
def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2),
- "cmplwi $dst, $src1, $src2", IntCompare>;
+ "cmplwi $dst, $src1, $src2", IIC_IntCompare>;
}
}
let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations.
+let isCommutable = 1 in {
defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "nand", "$rA, $rS, $rB", IntSimple,
+ "nand", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
defm AND : XForm_6r<31, 28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "and", "$rA, $rS, $rB", IntSimple,
+ "and", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (and i32:$rS, i32:$rB))]>;
+} // isCommutable
defm ANDC : XForm_6r<31, 60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "andc", "$rA, $rS, $rB", IntSimple,
+ "andc", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
+let isCommutable = 1 in {
defm OR : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "or", "$rA, $rS, $rB", IntSimple,
+ "or", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (or i32:$rS, i32:$rB))]>;
defm NOR : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "nor", "$rA, $rS, $rB", IntSimple,
+ "nor", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
+} // isCommutable
defm ORC : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "orc", "$rA, $rS, $rB", IntSimple,
+ "orc", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
+let isCommutable = 1 in {
defm EQV : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "eqv", "$rA, $rS, $rB", IntSimple,
+ "eqv", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
defm XOR : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "xor", "$rA, $rS, $rB", IntSimple,
+ "xor", "$rA, $rS, $rB", IIC_IntSimple,
[(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
+} // isCommutable
defm SLW : XForm_6r<31, 24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "slw", "$rA, $rS, $rB", IntGeneral,
+ "slw", "$rA, $rS, $rB", IIC_IntGeneral,
[(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
defm SRW : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "srw", "$rA, $rS, $rB", IntGeneral,
+ "srw", "$rA, $rS, $rB", IIC_IntGeneral,
[(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "sraw", "$rA, $rS, $rB", IntShift,
+ "sraw", "$rA, $rS, $rB", IIC_IntShift,
[(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
}
let PPC970_Unit = 1 in { // FXU Operations.
let neverHasSideEffects = 1 in {
defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH),
- "srawi", "$rA, $rS, $SH", IntShift,
+ "srawi", "$rA, $rS, $SH", IIC_IntShift,
[(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
defm CNTLZW : XForm_11r<31, 26, (outs gprc:$rA), (ins gprc:$rS),
- "cntlzw", "$rA, $rS", IntGeneral,
+ "cntlzw", "$rA, $rS", IIC_IntGeneral,
[(set i32:$rA, (ctlz i32:$rS))]>;
defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS),
- "extsb", "$rA, $rS", IntSimple,
+ "extsb", "$rA, $rS", IIC_IntSimple,
[(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS),
- "extsh", "$rA, $rS", IntSimple,
+ "extsh", "$rA, $rS", IIC_IntSimple,
[(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
}
let isCompare = 1, neverHasSideEffects = 1 in {
def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
- "cmpw $crD, $rA, $rB", IntCompare>;
+ "cmpw $crD, $rA, $rB", IIC_IntCompare>;
def CMPLW : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
- "cmplw $crD, $rA, $rB", IntCompare>;
+ "cmplw $crD, $rA, $rB", IIC_IntCompare>;
}
}
let PPC970_Unit = 3 in { // FPU Operations.
//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
-// "fcmpo $crD, $fA, $fB", FPCompare>;
+// "fcmpo $crD, $fA, $fB", IIC_FPCompare>;
let isCompare = 1, neverHasSideEffects = 1 in {
def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
- "fcmpu $crD, $fA, $fB", FPCompare>;
+ "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
- "fcmpu $crD, $fA, $fB", FPCompare>;
+ "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
}
let Uses = [RM] in {
let neverHasSideEffects = 1 in {
defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiw", "$frD, $frB", FPGeneral,
+ "fctiw", "$frD, $frB", IIC_FPGeneral,
[]>;
defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiwz", "$frD, $frB", FPGeneral,
+ "fctiwz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfctiwz f64:$frB))]>;
defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
- "frsp", "$frD, $frB", FPGeneral,
+ "frsp", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (fround f64:$frB))]>;
- let Interpretation64Bit = 1 in
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
- "frin", "$frD, $frB", FPGeneral,
+ "frin", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (frnd f64:$frB))]>;
defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
- "frin", "$frD, $frB", FPGeneral,
+ "frin", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (frnd f32:$frB))]>;
}
let neverHasSideEffects = 1 in {
- let Interpretation64Bit = 1 in
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB),
- "frip", "$frD, $frB", FPGeneral,
+ "frip", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (fceil f64:$frB))]>;
defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB),
- "frip", "$frD, $frB", FPGeneral,
+ "frip", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (fceil f32:$frB))]>;
- let Interpretation64Bit = 1 in
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB),
- "friz", "$frD, $frB", FPGeneral,
+ "friz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (ftrunc f64:$frB))]>;
defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB),
- "friz", "$frD, $frB", FPGeneral,
+ "friz", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (ftrunc f32:$frB))]>;
- let Interpretation64Bit = 1 in
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB),
- "frim", "$frD, $frB", FPGeneral,
+ "frim", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (ffloor f64:$frB))]>;
defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB),
- "frim", "$frD, $frB", FPGeneral,
+ "frim", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (ffloor f32:$frB))]>;
defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB),
- "fsqrt", "$frD, $frB", FPSqrt,
+ "fsqrt", "$frD, $frB", IIC_FPSqrtD,
[(set f64:$frD, (fsqrt f64:$frB))]>;
defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB),
- "fsqrts", "$frD, $frB", FPSqrt,
+ "fsqrts", "$frD, $frB", IIC_FPSqrtS,
[(set f32:$frD, (fsqrt f32:$frB))]>;
}
}
@@ -1761,54 +1866,54 @@ let Uses = [RM] in {
/// sneak into a d-group with a store).
let neverHasSideEffects = 1 in
defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB),
- "fmr", "$frD, $frB", FPGeneral,
+ "fmr", "$frD, $frB", IIC_FPGeneral,
[]>, // (set f32:$frD, f32:$frB)
PPC970_Unit_Pseudo;
let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations.
// These are artificially split into two different forms, for 4/8 byte FP.
defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB),
- "fabs", "$frD, $frB", FPGeneral,
+ "fabs", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (fabs f32:$frB))]>;
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FABSD : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB),
- "fabs", "$frD, $frB", FPGeneral,
+ "fabs", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (fabs f64:$frB))]>;
defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB),
- "fnabs", "$frD, $frB", FPGeneral,
+ "fnabs", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (fneg (fabs f32:$frB)))]>;
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB),
- "fnabs", "$frD, $frB", FPGeneral,
+ "fnabs", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (fneg (fabs f64:$frB)))]>;
defm FNEGS : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB),
- "fneg", "$frD, $frB", FPGeneral,
+ "fneg", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (fneg f32:$frB))]>;
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB),
- "fneg", "$frD, $frB", FPGeneral,
+ "fneg", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (fneg f64:$frB))]>;
defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB),
- "fcpsgn", "$frD, $frA, $frB", FPGeneral,
+ "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral,
[(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>;
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB),
- "fcpsgn", "$frD, $frA, $frB", FPGeneral,
+ "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral,
[(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>;
// Reciprocal estimates.
defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB),
- "fre", "$frD, $frB", FPGeneral,
+ "fre", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfre f64:$frB))]>;
defm FRES : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB),
- "fres", "$frD, $frB", FPGeneral,
+ "fres", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (PPCfre f32:$frB))]>;
defm FRSQRTE : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB),
- "frsqrte", "$frD, $frB", FPGeneral,
+ "frsqrte", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB),
- "frsqrtes", "$frD, $frB", FPGeneral,
+ "frsqrtes", "$frD, $frB", IIC_FPGeneral,
[(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
}
@@ -1816,57 +1921,67 @@ defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB),
//
let neverHasSideEffects = 1 in
def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA),
- "mcrf $BF, $BFA", BrMCR>,
+ "mcrf $BF, $BFA", IIC_BrMCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
+let isCommutable = 1 in {
def CRAND : XLForm_1<19, 257, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "crand $CRD, $CRA, $CRB", BrCR, []>;
+ "crand $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (and i1:$CRA, i1:$CRB))]>;
def CRNAND : XLForm_1<19, 225, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "crnand $CRD, $CRA, $CRB", BrCR, []>;
+ "crnand $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (not (and i1:$CRA, i1:$CRB)))]>;
def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "cror $CRD, $CRA, $CRB", BrCR, []>;
+ "cror $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (or i1:$CRA, i1:$CRB))]>;
def CRXOR : XLForm_1<19, 193, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "crxor $CRD, $CRA, $CRB", BrCR, []>;
+ "crxor $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (xor i1:$CRA, i1:$CRB))]>;
def CRNOR : XLForm_1<19, 33, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "crnor $CRD, $CRA, $CRB", BrCR, []>;
+ "crnor $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (not (or i1:$CRA, i1:$CRB)))]>;
def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "creqv $CRD, $CRA, $CRB", BrCR, []>;
+ "creqv $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (not (xor i1:$CRA, i1:$CRB)))]>;
+} // isCommutable
def CRANDC : XLForm_1<19, 129, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "crandc $CRD, $CRA, $CRB", BrCR, []>;
+ "crandc $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (and i1:$CRA, (not i1:$CRB)))]>;
def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "crorc $CRD, $CRA, $CRB", BrCR, []>;
+ "crorc $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>;
let isCodeGenOnly = 1 in {
def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
- "creqv $dst, $dst, $dst", BrCR,
- []>;
+ "creqv $dst, $dst, $dst", IIC_BrCR,
+ [(set i1:$dst, 1)]>;
def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
- "crxor $dst, $dst, $dst", BrCR,
- []>;
+ "crxor $dst, $dst, $dst", IIC_BrCR,
+ [(set i1:$dst, 0)]>;
let Defs = [CR1EQ], CRD = 6 in {
def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
- "creqv 6, 6, 6", BrCR,
+ "creqv 6, 6, 6", IIC_BrCR,
[(PPCcr6set)]>;
def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
- "crxor 6, 6, 6", BrCR,
+ "crxor 6, 6, 6", IIC_BrCR,
[(PPCcr6unset)]>;
}
}
@@ -1875,38 +1990,38 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
//
def MFSPR : XFXForm_1<31, 339, (outs gprc:$RT), (ins i32imm:$SPR),
- "mfspr $RT, $SPR", SprMFSPR>;
+ "mfspr $RT, $SPR", IIC_SprMFSPR>;
def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
- "mtspr $SPR, $RT", SprMTSPR>;
+ "mtspr $SPR, $RT", IIC_SprMTSPR>;
def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
- "mftb $RT, $SPR", SprMFTB>, Deprecated<DeprecatedMFTB>;
+ "mftb $RT, $SPR", IIC_SprMFTB>, Deprecated<DeprecatedMFTB>;
let Uses = [CTR] in {
def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins),
- "mfctr $rT", SprMFSPR>,
+ "mfctr $rT", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
- "mtctr $rS", SprMTSPR>,
+ "mtctr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in {
let Pattern = [(int_ppc_mtctr i32:$rS)] in
def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
- "mtctr $rS", SprMTSPR>,
+ "mtctr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Defs = [LR] in {
def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
- "mtlr $rS", SprMTSPR>,
+ "mtlr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Uses = [LR] in {
def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins),
- "mflr $rT", SprMFSPR>,
+ "mflr $rT", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
@@ -1915,19 +2030,19 @@ let isCodeGenOnly = 1 in {
// like a GPR on the PPC970. As such, copies in and out have the same
// performance characteristics as an OR instruction.
def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS),
- "mtspr 256, $rS", IntGeneral>,
+ "mtspr 256, $rS", IIC_IntGeneral>,
PPC970_DGroup_Single, PPC970_Unit_FXU;
def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins),
- "mfspr $rT, 256", IntGeneral>,
+ "mfspr $rT, 256", IIC_IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
(outs VRSAVERC:$reg), (ins gprc:$rS),
- "mtspr 256, $rS", IntGeneral>,
+ "mtspr 256, $rS", IIC_IntGeneral>,
PPC970_DGroup_Single, PPC970_Unit_FXU;
def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT),
(ins VRSAVERC:$reg),
- "mfspr $rT, 256", IntGeneral>,
+ "mfspr $rT, 256", IIC_IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
@@ -1945,20 +2060,20 @@ def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
let neverHasSideEffects = 1 in {
def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST),
- "mtocrf $FXM, $ST", BrMCRX>,
+ "mtocrf $FXM, $ST", IIC_BrMCRX>,
PPC970_DGroup_First, PPC970_Unit_CRU;
def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS),
- "mtcrf $FXM, $rS", BrMCRX>,
+ "mtcrf $FXM, $rS", IIC_BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
- "mfocrf $rT, $FXM", SprMFCR>,
+ "mfocrf $rT, $FXM", IIC_SprMFCRF>,
PPC970_DGroup_First, PPC970_Unit_CRU;
def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins),
- "mfcr $rT", SprMFCR>,
+ "mfcr $rT", IIC_SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
} // neverHasSideEffects = 1
@@ -1972,18 +2087,18 @@ let usesCustomInserter = 1, Uses = [RM] in {
// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level.
let Uses = [RM], Defs = [RM] in {
def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
- "mtfsb0 $FM", IntMTFSB0, []>,
+ "mtfsb0 $FM", IIC_IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
- "mtfsb1 $FM", IntMTFSB0, []>,
+ "mtfsb1 $FM", IIC_IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
- "mtfsf $FM, $rT", IntMTFSB0, []>,
+ "mtfsf $FM, $rT", IIC_IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
let Uses = [RM] in {
def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins),
- "mffs $rT", IntMFFS,
+ "mffs $rT", IIC_IntMFFS,
[(set f64:$rT, (PPCmffs))]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
@@ -1991,59 +2106,68 @@ let Uses = [RM] in {
let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations.
// XO-Form instructions. Arithmetic instructions that can set overflow bit
-//
+let isCommutable = 1 in
defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "add", "$rT, $rA, $rB", IntSimple,
+ "add", "$rT, $rA, $rB", IIC_IntSimple,
[(set i32:$rT, (add i32:$rA, i32:$rB))]>;
+let isCodeGenOnly = 1 in
+def ADD4TLS : XOForm_1<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, tlsreg32:$rB),
+ "add $rT, $rA, $rB", IIC_IntSimple,
+ [(set i32:$rT, (add i32:$rA, tglobaltlsaddr:$rB))]>;
+let isCommutable = 1 in
defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "addc", "$rT, $rA, $rB", IntGeneral,
+ "addc", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
PPC970_DGroup_Cracked;
+
defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divw", "$rT, $rA, $rB", IntDivW,
+ "divw", "$rT, $rA, $rB", IIC_IntDivW,
[(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divwu", "$rT, $rA, $rB", IntDivW,
+ "divwu", "$rT, $rA, $rB", IIC_IntDivW,
[(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
+let isCommutable = 1 in {
defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "mulhw", "$rT, $rA, $rB", IntMulHW,
+ "mulhw", "$rT, $rA, $rB", IIC_IntMulHW,
[(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "mulhwu", "$rT, $rA, $rB", IntMulHWU,
+ "mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU,
[(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "mullw", "$rT, $rA, $rB", IntMulHW,
+ "mullw", "$rT, $rA, $rB", IIC_IntMulHW,
[(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
+} // isCommutable
defm SUBF : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "subf", "$rT, $rA, $rB", IntGeneral,
+ "subf", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "subfc", "$rT, $rA, $rB", IntGeneral,
+ "subfc", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
PPC970_DGroup_Cracked;
defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA),
- "neg", "$rT, $rA", IntSimple,
+ "neg", "$rT, $rA", IIC_IntSimple,
[(set i32:$rT, (ineg i32:$rA))]>;
let Uses = [CARRY] in {
+let isCommutable = 1 in
defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "adde", "$rT, $rA, $rB", IntGeneral,
+ "adde", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA),
- "addme", "$rT, $rA", IntGeneral,
+ "addme", "$rT, $rA", IIC_IntGeneral,
[(set i32:$rT, (adde i32:$rA, -1))]>;
defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA),
- "addze", "$rT, $rA", IntGeneral,
+ "addze", "$rT, $rA", IIC_IntGeneral,
[(set i32:$rT, (adde i32:$rA, 0))]>;
defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "subfe", "$rT, $rA, $rB", IntGeneral,
+ "subfe", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA),
- "subfme", "$rT, $rA", IntGeneral,
+ "subfme", "$rT, $rA", IIC_IntGeneral,
[(set i32:$rT, (sube -1, i32:$rA))]>;
defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA),
- "subfze", "$rT, $rA", IntGeneral,
+ "subfze", "$rT, $rA", IIC_IntGeneral,
[(set i32:$rT, (sube 0, i32:$rA))]>;
}
}
@@ -2053,90 +2177,96 @@ defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA),
//
let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations.
let Uses = [RM] in {
+let isCommutable = 1 in {
defm FMADD : AForm_1r<63, 29,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
- "fmadd", "$FRT, $FRA, $FRC, $FRB", FPFused,
+ "fmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
defm FMADDS : AForm_1r<59, 29,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
- "fmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+ "fmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
defm FMSUB : AForm_1r<63, 28,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
- "fmsub", "$FRT, $FRA, $FRC, $FRB", FPFused,
+ "fmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set f64:$FRT,
(fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
defm FMSUBS : AForm_1r<59, 28,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
- "fmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+ "fmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT,
(fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
defm FNMADD : AForm_1r<63, 31,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
- "fnmadd", "$FRT, $FRA, $FRC, $FRB", FPFused,
+ "fnmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set f64:$FRT,
(fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
defm FNMADDS : AForm_1r<59, 31,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
- "fnmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+ "fnmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT,
(fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
defm FNMSUB : AForm_1r<63, 30,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
- "fnmsub", "$FRT, $FRA, $FRC, $FRB", FPFused,
+ "fnmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
(fneg f64:$FRB))))]>;
defm FNMSUBS : AForm_1r<59, 30,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
- "fnmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+ "fnmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
(fneg f32:$FRB))))]>;
+} // isCommutable
}
// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
// having 4 of these, force the comparison to always be an 8-byte double (code
// should use an FMRSD if the input comparison value really wants to be a float)
// and 4/8 byte forms for the result and operand type..
-let Interpretation64Bit = 1 in
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FSELD : AForm_1r<63, 23,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
- "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+ "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
defm FSELS : AForm_1r<63, 23,
(outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB),
- "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+ "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
let Uses = [RM] in {
+ let isCommutable = 1 in {
defm FADD : AForm_2r<63, 21,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
- "fadd", "$FRT, $FRA, $FRB", FPAddSub,
+ "fadd", "$FRT, $FRA, $FRB", IIC_FPAddSub,
[(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
defm FADDS : AForm_2r<59, 21,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
- "fadds", "$FRT, $FRA, $FRB", FPGeneral,
+ "fadds", "$FRT, $FRA, $FRB", IIC_FPGeneral,
[(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
+ } // isCommutable
defm FDIV : AForm_2r<63, 18,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
- "fdiv", "$FRT, $FRA, $FRB", FPDivD,
+ "fdiv", "$FRT, $FRA, $FRB", IIC_FPDivD,
[(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
defm FDIVS : AForm_2r<59, 18,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
- "fdivs", "$FRT, $FRA, $FRB", FPDivS,
+ "fdivs", "$FRT, $FRA, $FRB", IIC_FPDivS,
[(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
+ let isCommutable = 1 in {
defm FMUL : AForm_3r<63, 25,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC),
- "fmul", "$FRT, $FRA, $FRC", FPFused,
+ "fmul", "$FRT, $FRA, $FRC", IIC_FPFused,
[(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
defm FMULS : AForm_3r<59, 25,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC),
- "fmuls", "$FRT, $FRA, $FRC", FPGeneral,
+ "fmuls", "$FRT, $FRA, $FRC", IIC_FPGeneral,
[(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
+ } // isCommutable
defm FSUB : AForm_2r<63, 20,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
- "fsub", "$FRT, $FRA, $FRB", FPAddSub,
+ "fsub", "$FRT, $FRA, $FRB", IIC_FPAddSub,
[(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
defm FSUBS : AForm_2r<59, 20,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
- "fsubs", "$FRT, $FRA, $FRB", FPGeneral,
+ "fsubs", "$FRT, $FRA, $FRB", IIC_FPGeneral,
[(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
}
}
@@ -2146,7 +2276,7 @@ let PPC970_Unit = 1 in { // FXU Operations.
let isSelect = 1 in
def ISEL : AForm_4<31, 15,
(outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond),
- "isel $rT, $rA, $rB, $cond", IntGeneral,
+ "isel $rT, $rA, $rB, $cond", IIC_IntGeneral,
[]>;
}
@@ -2157,24 +2287,24 @@ let isCommutable = 1 in {
// RLWIMI can be commuted if the rotate amount is zero.
defm RLWIMI : MForm_2r<20, (outs gprc:$rA),
(ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB,
- u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", IntRotate,
- []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
- NoEncode<"$rSi">;
+ u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME",
+ IIC_IntRotate, []>, PPC970_DGroup_Cracked,
+ RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">;
}
let BaseName = "rlwinm" in {
def RLWINM : MForm_2<21,
(outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
- "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ "rlwinm $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
[]>, RecFormRel;
let Defs = [CR0] in
def RLWINMo : MForm_2<21,
(outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
- "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ "rlwinm. $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
[]>, isDOT, RecFormRel, PPC970_DGroup_Cracked;
}
defm RLWNM : MForm_2r<23, (outs gprc:$rA),
(ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME),
- "rlwnm", "$rA, $rS, $rB, $MB, $ME", IntGeneral,
+ "rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral,
[]>;
}
} // neverHasSideEffects = 1
@@ -2188,8 +2318,10 @@ def : Pat<(i32 imm:$imm),
(ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
// Implement the 'not' operation with the NOR instruction.
-def NOT : Pat<(not i32:$in),
- (NOR $in, $in)>;
+def i32not : OutPatFrag<(ops node:$in),
+ (NOR $in, $in)>;
+def : Pat<(not i32:$in),
+ (i32not $in)>;
// ADD an arbitrary immediate.
def : Pat<(add i32:$in, imm:$imm),
@@ -2260,6 +2392,29 @@ def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)),
def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS $in, tblockaddress:$g)>;
+// Support for thread-local storage.
+def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
+ [(set i32:$rD, (PPCppc32GOT))]>;
+
+def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
+ "#LDgotTprelL32",
+ [(set i32:$rD,
+ (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
+def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g),
+ (ADD4TLS $in, tglobaltlsaddr:$g)>;
+
+// Support for Position-independent code
+def LWZtoc: Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
+ "#LWZtoc",
+ [(set i32:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+// Get Global (GOT) Base Register offset, from the word immediately preceding
+// the function label.
+def GetGBRO: Pseudo<(outs gprc:$rT), (ins gprc:$rI), "#GetGBRO", []>;
+// Update the Global(GOT) Base Register with the above offset.
+def UpdateGBR: Pseudo<(outs gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
+
+
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
// amounts.
@@ -2315,52 +2470,561 @@ def : Pat<(fcopysign f32:$frB, f64:$frA),
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
+include "PPCInstrVSX.td"
+
+def crnot : OutPatFrag<(ops node:$in),
+ (CRNOR $in, $in)>;
+def : Pat<(not i1:$in),
+ (crnot $in)>;
+
+// Patterns for arithmetic i1 operations.
+def : Pat<(add i1:$a, i1:$b),
+ (CRXOR $a, $b)>;
+def : Pat<(sub i1:$a, i1:$b),
+ (CRXOR $a, $b)>;
+def : Pat<(mul i1:$a, i1:$b),
+ (CRAND $a, $b)>;
+
+// We're sometimes asked to materialize i1 -1, which is just 1 in this case
+// (-1 is used to mean all bits set).
+def : Pat<(i1 -1), (CRSET)>;
+
+// i1 extensions, implemented in terms of isel.
+def : Pat<(i32 (zext i1:$in)),
+ (SELECT_I4 $in, (LI 1), (LI 0))>;
+def : Pat<(i32 (sext i1:$in)),
+ (SELECT_I4 $in, (LI -1), (LI 0))>;
+
+def : Pat<(i64 (zext i1:$in)),
+ (SELECT_I8 $in, (LI8 1), (LI8 0))>;
+def : Pat<(i64 (sext i1:$in)),
+ (SELECT_I8 $in, (LI8 -1), (LI8 0))>;
+
+// FIXME: We should choose either a zext or a sext based on other constants
+// already around.
+def : Pat<(i32 (anyext i1:$in)),
+ (SELECT_I4 $in, (LI 1), (LI 0))>;
+def : Pat<(i64 (anyext i1:$in)),
+ (SELECT_I8 $in, (LI8 1), (LI8 0))>;
+
+// match setcc on i1 variables.
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)),
+ (CRANDC $s2, $s1)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)),
+ (CRANDC $s2, $s1)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)),
+ (CRORC $s2, $s1)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)),
+ (CRORC $s2, $s1)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)),
+ (CREQV $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)),
+ (CRORC $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)),
+ (CRORC $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)),
+ (CRANDC $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)),
+ (CRANDC $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)),
+ (CRXOR $s1, $s2)>;
+
+// match setcc on non-i1 (non-vector) variables. Note that SETUEQ, SETOGE,
+// SETOLE, SETONE, SETULT and SETUGT should be expanded by legalize for
+// floating-point types.
+
+multiclass CRNotPat<dag pattern, dag result> {
+ def : Pat<pattern, (crnot result)>;
+ def : Pat<(not pattern), result>;
+
+ // We can also fold the crnot into an extension:
+ def : Pat<(i32 (zext pattern)),
+ (SELECT_I4 result, (LI 0), (LI 1))>;
+ def : Pat<(i32 (sext pattern)),
+ (SELECT_I4 result, (LI 0), (LI -1))>;
+
+ // We can also fold the crnot into an extension:
+ def : Pat<(i64 (zext pattern)),
+ (SELECT_I8 result, (LI8 0), (LI8 1))>;
+ def : Pat<(i64 (sext pattern)),
+ (SELECT_I8 result, (LI8 0), (LI8 -1))>;
+
+ // FIXME: We should choose either a zext or a sext based on other constants
+ // already around.
+ def : Pat<(i32 (anyext pattern)),
+ (SELECT_I4 result, (LI 0), (LI 1))>;
+
+ def : Pat<(i64 (anyext pattern)),
+ (SELECT_I8 result, (LI8 0), (LI8 1))>;
+}
+
+// FIXME: Because of what seems like a bug in TableGen's type-inference code,
+// we need to write imm:$imm in the output patterns below, not just $imm, or
+// else the resulting matcher will not correctly add the immediate operand
+// (making it a register operand instead).
+
+// extended SETCC.
+multiclass ExtSetCCPat<CondCode cc, PatFrag pfrag,
+ OutPatFrag rfrag, OutPatFrag rfrag8> {
+ def : Pat<(i32 (zext (i1 (pfrag i32:$s1, cc)))),
+ (rfrag $s1)>;
+ def : Pat<(i64 (zext (i1 (pfrag i64:$s1, cc)))),
+ (rfrag8 $s1)>;
+ def : Pat<(i64 (zext (i1 (pfrag i32:$s1, cc)))),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>;
+ def : Pat<(i32 (zext (i1 (pfrag i64:$s1, cc)))),
+ (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>;
+
+ def : Pat<(i32 (anyext (i1 (pfrag i32:$s1, cc)))),
+ (rfrag $s1)>;
+ def : Pat<(i64 (anyext (i1 (pfrag i64:$s1, cc)))),
+ (rfrag8 $s1)>;
+ def : Pat<(i64 (anyext (i1 (pfrag i32:$s1, cc)))),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>;
+ def : Pat<(i32 (anyext (i1 (pfrag i64:$s1, cc)))),
+ (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>;
+}
+
+// Note that we do all inversions below with i(32|64)not, instead of using
+// (xori x, 1) because on the A2 nor has single-cycle latency while xori
+// has 2-cycle latency.
+
+defm : ExtSetCCPat<SETEQ,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (CNTLZW $in), 27, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (CNTLZD $in), 58, 63)> >;
+
+defm : ExtSetCCPat<SETNE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (i32not (CNTLZW $in)), 27, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (i64not (CNTLZD $in)), 58, 63)> >;
+
+defm : ExtSetCCPat<SETLT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM $in, 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL $in, 1, 63)> >;
+
+defm : ExtSetCCPat<SETGE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (i32not $in), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (i64not $in), 1, 63)> >;
+
+defm : ExtSetCCPat<SETGT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (ANDC (NEG $in), $in), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (ANDC8 (NEG8 $in), $in), 1, 63)> >;
+
+defm : ExtSetCCPat<SETLE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (ORC $in, (NEG $in)), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (ORC8 $in, (NEG8 $in)), 1, 63)> >;
+
+defm : ExtSetCCPat<SETLT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (AND $in, (ADDI $in, 1)), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (AND8 $in, (ADDI8 $in, 1)), 1, 63)> >;
+
+defm : ExtSetCCPat<SETGE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (NAND $in, (ADDI $in, 1)), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (NAND8 $in, (ADDI8 $in, 1)), 1, 63)> >;
+
+defm : ExtSetCCPat<SETGT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (i32not $in), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (i64not $in), 1, 63)> >;
+
+defm : ExtSetCCPat<SETLE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM $in, 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL $in, 1, 63)> >;
+
+// SETCC for i32.
+def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULT)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLT)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGT)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGT)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>;
+def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>;
+
+// For non-equality comparisons, the default code would materialize the
+// constant, then compare against it, like this:
+// lis r2, 4660
+// ori r2, r2, 22136
+// cmpw cr0, r3, r2
+// beq cr0,L6
+// Since we are just comparing for equality, we can emit this instead:
+// xoris r0,r3,0x1234
+// cmplwi cr0,r0,0x5678
+// beq cr0,L6
+
+def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETUGT)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>;
+
+// SETCC for i64.
+def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLT)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGT)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGT)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>;
+def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>;
+
+// For non-equality comparisons, the default code would materialize the
+// constant, then compare against it, like this:
+// lis r2, 4660
+// ori r2, r2, 22136
+// cmpd cr0, r3, r2
+// beq cr0,L6
+// Since we are just comparing for equality, we can emit this instead:
+// xoris r0,r3,0x1234
+// cmpldi cr0,r0,0x5678
+// beq cr0,L6
+
+def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULE)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLE)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETULE)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+
+// SETCC for f32.
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
+
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
+
+// SETCC for f64.
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
+
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
+
+// match select on i1 variables:
+def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)),
+ (CROR (CRAND $cond , $tval),
+ (CRAND (crnot $cond), $fval))>;
+
+// match selectcc on i1 variables:
+// select (lhs == rhs), tval, fval is:
+// ((lhs == rhs) & tval) | (!(lhs == rhs) & fval)
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)),
+ (CROR (CRAND (CRANDC $rhs, $lhs), $tval),
+ (CRAND (CRORC $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)),
+ (CROR (CRAND (CRORC $rhs, $lhs), $tval),
+ (CRAND (CRANDC $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)),
+ (CROR (CRAND (CREQV $lhs, $rhs), $tval),
+ (CRAND (CRXOR $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)),
+ (CROR (CRAND (CRORC $lhs, $rhs), $tval),
+ (CRAND (CRANDC $rhs, $lhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)),
+ (CROR (CRAND (CRANDC $lhs, $rhs), $tval),
+ (CRAND (CRORC $rhs, $lhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)),
+ (CROR (CRAND (CREQV $lhs, $rhs), $fval),
+ (CRAND (CRXOR $lhs, $rhs), $tval))>;
+
+// match selectcc on i1 variables with non-i1 output.
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)),
+ (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)),
+ (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)),
+ (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)),
+ (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)),
+ (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)),
+ (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)),
+ (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)),
+ (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)),
+ (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)),
+ (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)),
+ (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)),
+ (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+ (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+ (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
+ (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+ (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+ (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
+ (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+ (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+ (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
+ (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+ (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+ (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
+ (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)),
+ (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)),
+ (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)),
+ (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)),
+ (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)),
+ (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)),
+ (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+let usesCustomInserter = 1 in {
+def ANDIo_1_EQ_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+ "#ANDIo_1_EQ_BIT",
+ [(set i1:$dst, (trunc (not i32:$in)))]>;
+def ANDIo_1_GT_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+ "#ANDIo_1_GT_BIT",
+ [(set i1:$dst, (trunc i32:$in))]>;
+
+def ANDIo_1_EQ_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+ "#ANDIo_1_EQ_BIT8",
+ [(set i1:$dst, (trunc (not i64:$in)))]>;
+def ANDIo_1_GT_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+ "#ANDIo_1_GT_BIT8",
+ [(set i1:$dst, (trunc i64:$in))]>;
+}
+
+def : Pat<(i1 (not (trunc i32:$in))),
+ (ANDIo_1_EQ_BIT $in)>;
+def : Pat<(i1 (not (trunc i64:$in))),
+ (ANDIo_1_EQ_BIT8 $in)>;
//===----------------------------------------------------------------------===//
// PowerPC Instructions used for assembler/disassembler only
//
def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
- "isync", SprISYNC, []>;
+ "isync", IIC_SprISYNC, []>;
def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
- "icbi $src", LdStICBI, []>;
+ "icbi $src", IIC_LdStICBI, []>;
def EIEIO : XForm_24_eieio<31, 854, (outs), (ins),
- "eieio", LdStLoad, []>;
+ "eieio", IIC_LdStLoad, []>;
def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L),
- "wait $L", LdStLoad, []>;
+ "wait $L", IIC_LdStLoad, []>;
def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L),
- "mtmsr $RS, $L", SprMTMSR>;
+ "mtmsr $RS, $L", IIC_SprMTMSR>;
def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
- "mfmsr $RT", SprMFMSR, []>;
+ "mfmsr $RT", IIC_SprMFMSR, []>;
def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L),
- "mtmsrd $RS, $L", SprMTMSRD>;
+ "mtmsrd $RS, $L", IIC_SprMTMSRD>;
def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB),
- "slbie $RB", SprSLBIE, []>;
+ "slbie $RB", IIC_SprSLBIE, []>;
def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB),
- "slbmte $RS, $RB", SprSLBMTE, []>;
+ "slbmte $RS, $RB", IIC_SprSLBMTE, []>;
def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB),
- "slbmfee $RT, $RB", SprSLBMFEE, []>;
+ "slbmfee $RT, $RB", IIC_SprSLBMFEE, []>;
-def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", SprSLBIA, []>;
+def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
def TLBSYNC : XForm_0<31, 566, (outs), (ins),
- "tlbsync", SprTLBSYNC, []>;
+ "tlbsync", IIC_SprTLBSYNC, []>;
def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB),
- "tlbiel $RB", SprTLBIEL, []>;
+ "tlbiel $RB", IIC_SprTLBIEL, []>;
def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB),
- "tlbie $RB,$RS", SprTLBIE, []>;
+ "tlbie $RB,$RS", IIC_SprTLBIE, []>;
//===----------------------------------------------------------------------===//
// PowerPC Assembler Instruction Aliases
@@ -2576,19 +3240,19 @@ let PPC970_Unit = 7 in {
let Defs = [CTR], Uses = [CTR, LR, RM] in
def gBCLR : XLForm_2<19, 16, 0, (outs),
(ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
- "bclr $bo, $bi, $bh", BrB, []>;
+ "bclr $bo, $bi, $bh", IIC_BrB, []>;
let Defs = [LR, CTR], Uses = [CTR, LR, RM] in
def gBCLRL : XLForm_2<19, 16, 1, (outs),
(ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
- "bclrl $bo, $bi, $bh", BrB, []>;
+ "bclrl $bo, $bi, $bh", IIC_BrB, []>;
let Defs = [CTR], Uses = [CTR, LR, RM] in
def gBCCTR : XLForm_2<19, 528, 0, (outs),
(ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
- "bcctr $bo, $bi, $bh", BrB, []>;
+ "bcctr $bo, $bi, $bh", IIC_BrB, []>;
let Defs = [LR, CTR], Uses = [CTR, LR, RM] in
def gBCCTRL : XLForm_2<19, 528, 1, (outs),
(ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
- "bcctrl $bo, $bi, $bh", BrB, []>;
+ "bcctrl $bo, $bi, $bh", IIC_BrB, []>;
}
def : InstAlias<"bclr $bo, $bi", (gBCLR u5imm:$bo, crbitrc:$bi, 0)>;
def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>;
@@ -2631,14 +3295,14 @@ multiclass BranchExtendedMnemonicPM<string name, string pm, int bibo> {
(BCCA bibo, CR0, abscondbrtarget:$dst)>;
def : InstAlias<"b"#name#"lr"#pm#" $cc",
- (BCLR bibo, crrc:$cc)>;
+ (BCCLR bibo, crrc:$cc)>;
def : InstAlias<"b"#name#"lr"#pm,
- (BCLR bibo, CR0)>;
+ (BCCLR bibo, CR0)>;
def : InstAlias<"b"#name#"ctr"#pm#" $cc",
- (BCCTR bibo, crrc:$cc)>;
+ (BCCCTR bibo, crrc:$cc)>;
def : InstAlias<"b"#name#"ctr"#pm,
- (BCCTR bibo, CR0)>;
+ (BCCCTR bibo, CR0)>;
def : InstAlias<"b"#name#"l"#pm#" $cc, $dst",
(BCCL bibo, crrc:$cc, condbrtarget:$dst)>;
@@ -2651,14 +3315,14 @@ multiclass BranchExtendedMnemonicPM<string name, string pm, int bibo> {
(BCCLA bibo, CR0, abscondbrtarget:$dst)>;
def : InstAlias<"b"#name#"lrl"#pm#" $cc",
- (BCLRL bibo, crrc:$cc)>;
+ (BCCLRL bibo, crrc:$cc)>;
def : InstAlias<"b"#name#"lrl"#pm,
- (BCLRL bibo, CR0)>;
+ (BCCLRL bibo, CR0)>;
def : InstAlias<"b"#name#"ctrl"#pm#" $cc",
- (BCCTRL bibo, crrc:$cc)>;
+ (BCCCTRL bibo, crrc:$cc)>;
def : InstAlias<"b"#name#"ctrl"#pm,
- (BCCTRL bibo, CR0)>;
+ (BCCCTRL bibo, CR0)>;
}
multiclass BranchExtendedMnemonic<string name, int bibo> {
defm : BranchExtendedMnemonicPM<name, "", bibo>;
@@ -2682,18 +3346,18 @@ def : InstAlias<"cmpwi $rA, $imm", (CMPWI CR0, gprc:$rA, s16imm:$imm)>;
def : InstAlias<"cmpw $rA, $rB", (CMPW CR0, gprc:$rA, gprc:$rB)>;
def : InstAlias<"cmplwi $rA, $imm", (CMPLWI CR0, gprc:$rA, u16imm:$imm)>;
def : InstAlias<"cmplw $rA, $rB", (CMPLW CR0, gprc:$rA, gprc:$rB)>;
-def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm:$imm)>;
+def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm64:$imm)>;
def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>;
-def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm:$imm)>;
+def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm64:$imm)>;
def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>;
def : InstAlias<"cmpi $bf, 0, $rA, $imm", (CMPWI crrc:$bf, gprc:$rA, s16imm:$imm)>;
def : InstAlias<"cmp $bf, 0, $rA, $rB", (CMPW crrc:$bf, gprc:$rA, gprc:$rB)>;
def : InstAlias<"cmpli $bf, 0, $rA, $imm", (CMPLWI crrc:$bf, gprc:$rA, u16imm:$imm)>;
def : InstAlias<"cmpl $bf, 0, $rA, $rB", (CMPLW crrc:$bf, gprc:$rA, gprc:$rB)>;
-def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm:$imm)>;
+def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm64:$imm)>;
def : InstAlias<"cmp $bf, 1, $rA, $rB", (CMPD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
-def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm:$imm)>;
+def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm64:$imm)>;
def : InstAlias<"cmpl $bf, 1, $rA, $rB", (CMPLD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
multiclass TrapExtendedMnemonic<string name, int to> {
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
new file mode 100644
index 000000000000..49bcc4876d33
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -0,0 +1,816 @@
+//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the VSX extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def PPCRegVSRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vsrc : RegisterOperand<VSRC> {
+ let ParserMatchClass = PPCRegVSRCAsmOperand;
+}
+
+def PPCRegVSFRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vsfrc : RegisterOperand<VSFRC> {
+ let ParserMatchClass = PPCRegVSFRCAsmOperand;
+}
+
+multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XX3Form_Rc<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>;
+ let Defs = [CR6] in
+ def o : XX3Form_Rc<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT;
+ }
+}
+
+def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
+let Predicates = [HasVSX] in {
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+let neverHasSideEffects = 1 in { // VSX instructions don't have side effects.
+let Uses = [RM] in {
+
+ // Load indexed instructions
+ let mayLoad = 1, canFoldAsLoad = 1 in {
+ def LXSDX : XForm_1<31, 588,
+ (outs vsfrc:$XT), (ins memrr:$src),
+ "lxsdx $XT, $src", IIC_LdStLFD,
+ [(set f64:$XT, (load xoaddr:$src))]>;
+
+ def LXVD2X : XForm_1<31, 844,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvd2x $XT, $src", IIC_LdStLFD,
+ [(set v2f64:$XT, (load xoaddr:$src))]>;
+
+ def LXVDSX : XForm_1<31, 332,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvdsx $XT, $src", IIC_LdStLFD, []>;
+
+ def LXVW4X : XForm_1<31, 780,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvw4x $XT, $src", IIC_LdStLFD, []>;
+ }
+
+ // Store indexed instructions
+ let mayStore = 1 in {
+ def STXSDX : XX1Form<31, 716,
+ (outs), (ins vsfrc:$XT, memrr:$dst),
+ "stxsdx $XT, $dst", IIC_LdStSTFD,
+ [(store f64:$XT, xoaddr:$dst)]>;
+
+ def STXVD2X : XX1Form<31, 972,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxvd2x $XT, $dst", IIC_LdStSTFD,
+ [(store v2f64:$XT, xoaddr:$dst)]>;
+
+ def STXVW4X : XX1Form<31, 908,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxvw4x $XT, $dst", IIC_LdStSTFD, []>;
+ }
+
+ // Add/Mul Instructions
+ let isCommutable = 1 in {
+ def XSADDDP : XX3Form<60, 32,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsadddp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>;
+ def XSMULDP : XX3Form<60, 48,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmuldp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>;
+
+ def XVADDDP : XX3Form<60, 96,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvadddp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>;
+
+ def XVADDSP : XX3Form<60, 64,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvaddsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>;
+
+ def XVMULDP : XX3Form<60, 112,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmuldp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>;
+
+ def XVMULSP : XX3Form<60, 80,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmulsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>;
+ }
+
+ // Subtract Instructions
+ def XSSUBDP : XX3Form<60, 40,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xssubdp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>;
+
+ def XVSUBDP : XX3Form<60, 104,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvsubdp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>;
+ def XVSUBSP : XX3Form<60, 72,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvsubsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>;
+
+ // FMA Instructions
+ let BaseName = "XSMADDADP" in {
+ let isCommutable = 1 in
+ def XSMADDADP : XX3Form<60, 33,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSMADDMDP : XX3Form<60, 41,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSMSUBADP" in {
+ let isCommutable = 1 in
+ def XSMSUBADP : XX3Form<60, 49,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSMSUBMDP : XX3Form<60, 57,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSNMADDADP" in {
+ let isCommutable = 1 in
+ def XSNMADDADP : XX3Form<60, 161,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSNMADDMDP : XX3Form<60, 169,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSNMSUBADP" in {
+ let isCommutable = 1 in
+ def XSNMSUBADP : XX3Form<60, 177,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSNMSUBMDP : XX3Form<60, 185,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMADDADP" in {
+ let isCommutable = 1 in
+ def XVMADDADP : XX3Form<60, 97,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMADDMDP : XX3Form<60, 105,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMADDASP" in {
+ let isCommutable = 1 in
+ def XVMADDASP : XX3Form<60, 65,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMADDMSP : XX3Form<60, 73,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMSUBADP" in {
+ let isCommutable = 1 in
+ def XVMSUBADP : XX3Form<60, 113,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMSUBMDP : XX3Form<60, 121,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMSUBASP" in {
+ let isCommutable = 1 in
+ def XVMSUBASP : XX3Form<60, 81,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMSUBMSP : XX3Form<60, 89,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMADDADP" in {
+ let isCommutable = 1 in
+ def XVNMADDADP : XX3Form<60, 225,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMADDMDP : XX3Form<60, 233,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMADDASP" in {
+ let isCommutable = 1 in
+ def XVNMADDASP : XX3Form<60, 193,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMADDMSP : XX3Form<60, 201,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMSUBADP" in {
+ let isCommutable = 1 in
+ def XVNMSUBADP : XX3Form<60, 241,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMSUBMDP : XX3Form<60, 249,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMSUBASP" in {
+ let isCommutable = 1 in
+ def XVNMSUBASP : XX3Form<60, 209,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMSUBMSP : XX3Form<60, 217,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ // Division Instructions
+ def XSDIVDP : XX3Form<60, 56,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsdivdp $XT, $XA, $XB", IIC_FPDivD,
+ [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>;
+ def XSSQRTDP : XX2Form<60, 75,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xssqrtdp $XT, $XB", IIC_FPSqrtD,
+ [(set f64:$XT, (fsqrt f64:$XB))]>;
+
+ def XSREDP : XX2Form<60, 90,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsredp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfre f64:$XB))]>;
+ def XSRSQRTEDP : XX2Form<60, 74,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrsqrtedp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
+
+ def XSTDIVDP : XX3Form_1<60, 61,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XSTSQRTDP : XX2Form_1<60, 106,
+ (outs crrc:$crD), (ins vsfrc:$XB),
+ "xstsqrtdp $crD, $XB", IIC_FPCompare, []>;
+
+ def XVDIVDP : XX3Form<60, 120,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvdivdp $XT, $XA, $XB", IIC_FPDivD,
+ [(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>;
+ def XVDIVSP : XX3Form<60, 88,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvdivsp $XT, $XA, $XB", IIC_FPDivS,
+ [(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>;
+
+ def XVSQRTDP : XX2Form<60, 203,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvsqrtdp $XT, $XB", IIC_FPSqrtD,
+ [(set v2f64:$XT, (fsqrt v2f64:$XB))]>;
+ def XVSQRTSP : XX2Form<60, 139,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvsqrtsp $XT, $XB", IIC_FPSqrtS,
+ [(set v4f32:$XT, (fsqrt v4f32:$XB))]>;
+
+ def XVTDIVDP : XX3Form_1<60, 125,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XVTDIVSP : XX3Form_1<60, 93,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>;
+
+ def XVTSQRTDP : XX2Form_1<60, 234,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>;
+ def XVTSQRTSP : XX2Form_1<60, 170,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>;
+
+ def XVREDP : XX2Form<60, 218,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvredp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (PPCfre v2f64:$XB))]>;
+ def XVRESP : XX2Form<60, 154,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvresp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (PPCfre v4f32:$XB))]>;
+
+ def XVRSQRTEDP : XX2Form<60, 202,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrsqrtedp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (PPCfrsqrte v2f64:$XB))]>;
+ def XVRSQRTESP : XX2Form<60, 138,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrsqrtesp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (PPCfrsqrte v4f32:$XB))]>;
+
+ // Compare Instructions
+ def XSCMPODP : XX3Form_1<60, 43,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpodp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XSCMPUDP : XX3Form_1<60, 35,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
+
+ defm XVCMPEQDP : XX3Form_Rcr<60, 99,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPEQSP : XX3Form_Rcr<60, 67,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGEDP : XX3Form_Rcr<60, 115,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGESP : XX3Form_Rcr<60, 83,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGTDP : XX3Form_Rcr<60, 107,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGTSP : XX3Form_Rcr<60, 75,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+
+ // Move Instructions
+ def XSABSDP : XX2Form<60, 345,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsabsdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fabs f64:$XB))]>;
+ def XSNABSDP : XX2Form<60, 361,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsnabsdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fabs f64:$XB)))]>;
+ def XSNEGDP : XX2Form<60, 377,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsnegdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg f64:$XB))]>;
+ def XSCPSGNDP : XX3Form<60, 176,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscpsgndp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>;
+
+ def XVABSDP : XX2Form<60, 473,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvabsdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fabs v2f64:$XB))]>;
+
+ def XVABSSP : XX2Form<60, 409,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvabssp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fabs v4f32:$XB))]>;
+
+ def XVCPSGNDP : XX3Form<60, 240,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcpsgndp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fcopysign v2f64:$XB, v2f64:$XA))]>;
+ def XVCPSGNSP : XX3Form<60, 208,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcpsgnsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fcopysign v4f32:$XB, v4f32:$XA))]>;
+
+ def XVNABSDP : XX2Form<60, 489,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnabsdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fabs v2f64:$XB)))]>;
+ def XVNABSSP : XX2Form<60, 425,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnabssp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fabs v4f32:$XB)))]>;
+
+ def XVNEGDP : XX2Form<60, 505,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnegdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg v2f64:$XB))]>;
+ def XVNEGSP : XX2Form<60, 441,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnegsp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg v4f32:$XB))]>;
+
+ // Conversion Instructions
+ def XSCVDPSP : XX2Form<60, 265,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpsp $XT, $XB", IIC_VecFP, []>;
+ def XSCVDPSXDS : XX2Form<60, 344,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpsxds $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctidz f64:$XB))]>;
+ def XSCVDPSXWS : XX2Form<60, 88,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpsxws $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctiwz f64:$XB))]>;
+ def XSCVDPUXDS : XX2Form<60, 328,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpuxds $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctiduz f64:$XB))]>;
+ def XSCVDPUXWS : XX2Form<60, 72,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpuxws $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctiwuz f64:$XB))]>;
+ def XSCVSPDP : XX2Form<60, 329,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvspdp $XT, $XB", IIC_VecFP, []>;
+ def XSCVSXDDP : XX2Form<60, 376,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvsxddp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfcfid f64:$XB))]>;
+ def XSCVUXDDP : XX2Form<60, 360,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvuxddp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfcfidu f64:$XB))]>;
+
+ def XVCVDPSP : XX2Form<60, 393,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPSXDS : XX2Form<60, 472,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsxds $XT, $XB", IIC_VecFP,
+ [(set v2i64:$XT, (fp_to_sint v2f64:$XB))]>;
+ def XVCVDPSXWS : XX2Form<60, 216,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPUXDS : XX2Form<60, 456,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpuxds $XT, $XB", IIC_VecFP,
+ [(set v2i64:$XT, (fp_to_uint v2f64:$XB))]>;
+ def XVCVDPUXWS : XX2Form<60, 200,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpuxws $XT, $XB", IIC_VecFP, []>;
+
+ def XVCVSPDP : XX2Form<60, 457,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPSXDS : XX2Form<60, 408,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspsxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPSXWS : XX2Form<60, 152,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspsxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPUXDS : XX2Form<60, 392,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspuxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPUXWS : XX2Form<60, 136,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspuxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXDDP : XX2Form<60, 504,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxddp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (sint_to_fp v2i64:$XB))]>;
+ def XVCVSXDSP : XX2Form<60, 440,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxdsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXWDP : XX2Form<60, 248,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxwdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXWSP : XX2Form<60, 184,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxwsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXDDP : XX2Form<60, 488,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxddp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (uint_to_fp v2i64:$XB))]>;
+ def XVCVUXDSP : XX2Form<60, 424,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxdsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXWDP : XX2Form<60, 232,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxwdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXWSP : XX2Form<60, 168,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxwsp $XT, $XB", IIC_VecFP, []>;
+
+ // Rounding Instructions
+ def XSRDPI : XX2Form<60, 73,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpi $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (frnd f64:$XB))]>;
+ def XSRDPIC : XX2Form<60, 107,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpic $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fnearbyint f64:$XB))]>;
+ def XSRDPIM : XX2Form<60, 121,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpim $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (ffloor f64:$XB))]>;
+ def XSRDPIP : XX2Form<60, 105,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpip $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fceil f64:$XB))]>;
+ def XSRDPIZ : XX2Form<60, 89,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpiz $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (ftrunc f64:$XB))]>;
+
+ def XVRDPI : XX2Form<60, 201,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpi $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (frnd v2f64:$XB))]>;
+ def XVRDPIC : XX2Form<60, 235,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpic $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
+ def XVRDPIM : XX2Form<60, 249,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpim $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (ffloor v2f64:$XB))]>;
+ def XVRDPIP : XX2Form<60, 233,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpip $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fceil v2f64:$XB))]>;
+ def XVRDPIZ : XX2Form<60, 217,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpiz $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (ftrunc v2f64:$XB))]>;
+
+ def XVRSPI : XX2Form<60, 137,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspi $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (frnd v4f32:$XB))]>;
+ def XVRSPIC : XX2Form<60, 171,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspic $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
+ def XVRSPIM : XX2Form<60, 185,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspim $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (ffloor v4f32:$XB))]>;
+ def XVRSPIP : XX2Form<60, 169,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspip $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fceil v4f32:$XB))]>;
+ def XVRSPIZ : XX2Form<60, 153,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspiz $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (ftrunc v4f32:$XB))]>;
+
+ // Max/Min Instructions
+ let isCommutable = 1 in {
+ def XSMAXDP : XX3Form<60, 160,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XSMINDP : XX3Form<60, 168,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmindp $XT, $XA, $XB", IIC_VecFP, []>;
+
+ def XVMAXDP : XX3Form<60, 224,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XVMINDP : XX3Form<60, 232,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmindp $XT, $XA, $XB", IIC_VecFP, []>;
+
+ def XVMAXSP : XX3Form<60, 192,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XVMINSP : XX3Form<60, 200,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvminsp $XT, $XA, $XB", IIC_VecFP, []>;
+ } // isCommutable
+} // Uses = [RM]
+
+ // Logical Instructions
+ let isCommutable = 1 in
+ def XXLAND : XX3Form<60, 130,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxland $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (and v4i32:$XA, v4i32:$XB))]>;
+ def XXLANDC : XX3Form<60, 138,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlandc $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (and v4i32:$XA,
+ (vnot_ppc v4i32:$XB)))]>;
+ let isCommutable = 1 in {
+ def XXLNOR : XX3Form<60, 162,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlnor $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (vnot_ppc (or v4i32:$XA,
+ v4i32:$XB)))]>;
+ def XXLOR : XX3Form<60, 146,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlor $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (or v4i32:$XA, v4i32:$XB))]>;
+ let isCodeGenOnly = 1 in
+ def XXLORf: XX3Form<60, 146,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>;
+ def XXLXOR : XX3Form<60, 154,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlxor $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>;
+ } // isCommutable
+
+ // Permutation Instructions
+ def XXMRGHW : XX3Form<60, 18,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxmrghw $XT, $XA, $XB", IIC_VecPerm, []>;
+ def XXMRGLW : XX3Form<60, 50,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxmrglw $XT, $XA, $XB", IIC_VecPerm, []>;
+
+ def XXPERMDI : XX3Form_2<60, 10,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
+ "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
+ def XXSEL : XX4Form<60, 3,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
+ "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;
+
+ def XXSLDWI : XX3Form_2<60, 2,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW),
+ "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>;
+ def XXSPLTW : XX2Form_2<60, 164,
+ (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
+ "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
+} // neverHasSideEffects
+} // AddedComplexity
+
+def : InstAlias<"xvmovdp $XT, $XB",
+ (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+def : InstAlias<"xvmovsp $XT, $XB",
+ (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+
+def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
+def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
+def : InstAlias<"xxmrghd $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
+def : InstAlias<"xxmrgld $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
+def : InstAlias<"xxswapd $XT, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
+
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+def : Pat<(v2f64 (scalar_to_vector f64:$A)),
+ (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
+
+def : Pat<(f64 (vector_extract v2f64:$S, 0)),
+ (f64 (EXTRACT_SUBREG $S, sub_64))>;
+def : Pat<(f64 (vector_extract v2f64:$S, 1)),
+ (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+
+// Additional fnmsub patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
+ (XSNMSUBADP $B, $C, $A)>;
+def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
+ (XSNMSUBADP $B, $C, $A)>;
+
+def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B),
+ (XVNMSUBADP $B, $C, $A)>;
+def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B),
+ (XVNMSUBADP $B, $C, $A)>;
+
+def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
+ (XVNMSUBASP $B, $C, $A)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
+ (XVNMSUBASP $B, $C, $A)>;
+
+def : Pat<(v2f64 (bitconvert v4f32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+
+def : Pat<(v4f32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+def : Pat<(v2i64 (bitconvert v4f32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+
+def : Pat<(v4f32 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+def : Pat<(v2f64 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v2i64 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+// sign extension patterns
+// To extend "in place" from v2i32 to v2i64, we have input data like:
+// | undef | i32 | undef | i32 |
+// but xvcvsxwdp expects the input in big-Endian format:
+// | i32 | undef | i32 | undef |
+// so we need to shift everything to the left by one i32 (word) before
+// the conversion.
+def : Pat<(sext_inreg v2i64:$C, v2i32),
+ (XVCVDPSXDS (XVCVSXWDP (XXSLDWI $C, $C, 1)))>;
+def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))),
+ (XVCVSXWDP (XXSLDWI $C, $C, 1))>;
+
+} // AddedComplexity
+} // HasVSX
+
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 5e3a48d8bbdb..e5f113a0c030 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -11,10 +11,9 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "PPCJITInfo.h"
#include "PPCRelocations.h"
-#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -22,8 +21,15 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+PPCJITInfo::PPCJITInfo(PPCSubtarget &STI)
+ : Subtarget(STI), is64Bit(STI.isPPC64()) {
+ useGOT = 0;
+}
+
#define BUILD_ADDIS(RD,RS,IMM16) \
((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
#define BUILD_ORI(RD,RS,UIMM16) \
@@ -214,6 +220,10 @@ asm(
".text\n"
".align 2\n"
".globl PPC64CompilationCallback\n"
+#if _CALL_ELF == 2
+ ".type PPC64CompilationCallback,@function\n"
+"PPC64CompilationCallback:\n"
+#else
".section \".opd\",\"aw\",@progbits\n"
".align 3\n"
"PPC64CompilationCallback:\n"
@@ -223,6 +233,7 @@ asm(
".align 4\n"
".type PPC64CompilationCallback,@function\n"
".L.PPC64CompilationCallback:\n"
+#endif
# else
asm(
".text\n"
@@ -387,7 +398,7 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
JCE.emitWordBE(0x7d6802a6); // mflr r11
JCE.emitWordBE(0xf9610060); // std r11, 96(r1)
- } else if (TM.getSubtargetImpl()->isDarwinABI()){
+ } else if (Subtarget.isDarwinABI()){
JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
JCE.emitWordBE(0x7d6802a6); // mflr r11
JCE.emitWordBE(0x91610028); // stw r11, 40(r1)
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
index 46d4a08eb687..b6b37ffb852b 100644
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -18,32 +18,29 @@
#include "llvm/Target/TargetJITInfo.h"
namespace llvm {
- class PPCTargetMachine;
+class PPCSubtarget;
+class PPCJITInfo : public TargetJITInfo {
+protected:
+ PPCSubtarget &Subtarget;
+ bool is64Bit;
- class PPCJITInfo : public TargetJITInfo {
- protected:
- PPCTargetMachine &TM;
- bool is64Bit;
- public:
- PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) {
- useGOT = 0;
- is64Bit = tmIs64Bit;
- }
+public:
+ PPCJITInfo(PPCSubtarget &STI);
- virtual StubLayout getStubLayout();
- virtual void *emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE);
- virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
- virtual void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase);
-
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- virtual void replaceMachineCodeForFunction(void *Old, void *New);
- };
+ StubLayout getStubLayout() override;
+ void *emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE) override;
+ LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
+ void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs,
+ unsigned char *GOTBase) override;
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ void replaceMachineCodeForFunction(void *Old, void *New) override;
+};
}
#endif
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index f61c8bf0216e..668041371780 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -13,17 +13,21 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
+#include "PPCSubtarget.h"
#include "MCTargetDesc/PPCMCExpr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
@@ -32,35 +36,42 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
+ const TargetMachine &TM = AP.TM;
+ Mangler *Mang = AP.Mang;
+ const DataLayout *DL = TM.getDataLayout();
MCContext &Ctx = AP.OutContext;
+ bool isDarwin = TM.getSubtarget<PPCSubtarget>().isDarwin();
SmallString<128> Name;
+ StringRef Suffix;
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB) {
+ if (isDarwin)
+ Suffix = "$stub";
+ } else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)
+ Suffix = "$non_lazy_ptr";
+
+ if (!Suffix.empty())
+ Name += DL->getPrivateGlobalPrefix();
+
+ unsigned PrefixLen = Name.size();
+
if (!MO.isGlobal()) {
assert(MO.isSymbol() && "Isn't a symbol reference");
- Name += AP.MAI->getGlobalPrefix();
- Name += MO.getSymbolName();
- } else {
+ Mang->getNameWithPrefix(Name, MO.getSymbolName());
+ } else {
const GlobalValue *GV = MO.getGlobal();
- bool isImplicitlyPrivate = false;
- if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB ||
- (MO.getTargetFlags() & PPCII::MO_NLP_FLAG))
- isImplicitlyPrivate = true;
-
- AP.Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+ TM.getNameWithPrefix(Name, GV, *Mang);
}
-
+
+ unsigned OrigLen = Name.size() - PrefixLen;
+
+ Name += Suffix;
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen);
+
// If the target flags on the operand changes the name of the symbol, do that
// before we return the symbol.
- if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
- Name += "$stub";
- const char *PGP = AP.MAI->getPrivateGlobalPrefix();
- const char *Prefix = "";
- if (!Name.startswith(PGP)) {
- // http://llvm.org/bugs/show_bug.cgi?id=15763
- // all stubs and lazy_ptrs should be local symbols, which need leading 'L'
- Prefix = PGP;
- }
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(Twine(Prefix) + Twine(Name));
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && isDarwin) {
MachineModuleInfoImpl::StubValueTy &StubSym =
getMachOMMI(AP).getFnStubEntry(Sym);
if (StubSym.getPointer())
@@ -72,10 +83,9 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
StubValueTy(AP.getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
} else {
- Name.erase(Name.end()-5, Name.end());
StubSym =
MachineModuleInfoImpl::
- StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
+ StubValueTy(Ctx.GetOrCreateSymbol(OrigName), false);
}
return Sym;
}
@@ -83,16 +93,13 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
// If the symbol reference is actually to a non_lazy_ptr, not to the symbol,
// then add the suffix.
if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) {
- Name += "$non_lazy_ptr";
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
-
MachineModuleInfoMachO &MachO = getMachOMMI(AP);
MachineModuleInfoImpl::StubValueTy &StubSym =
(MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ?
MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym);
- if (StubSym.getPointer() == 0) {
+ if (!StubSym.getPointer()) {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym = MachineModuleInfoImpl::
StubValueTy(AP.getSymbol(MO.getGlobal()),
@@ -101,7 +108,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
return Sym;
}
- return Ctx.GetOrCreateSymbol(Name.str());
+ return Sym;
}
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
@@ -132,6 +139,9 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
break;
}
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)
+ RefKind = MCSymbolRefExpr::VK_PLT;
+
const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
if (!MO.isJTI() && MO.getOffset())
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 6a0aec842be7..9da1b1b5c754 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -8,8 +8,16 @@
//===----------------------------------------------------------------------===//
#include "PPCMachineFunctionInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
void PPCFunctionInfo::anchor() { }
+MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
+ const DataLayout *DL = MF.getTarget().getDataLayout();
+ return MF.getContext().GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
+ Twine(MF.getFunctionNumber())+"$poff");
+}
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 33f843dfb432..9a2cec744274 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -92,6 +92,12 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// 64-bit SVR4 ABI.
SmallVector<unsigned, 3> MustSaveCRs;
+ /// Hold onto our MachineFunction context.
+ MachineFunction &MF;
+
+ /// Whether this uses the PIC Base register or not.
+ bool UsesPICBase;
+
public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
@@ -109,7 +115,9 @@ public:
VarArgsStackOffset(0),
VarArgsNumGPR(0),
VarArgsNumFPR(0),
- CRSpillFrameIndex(0) {}
+ CRSpillFrameIndex(0),
+ MF(MF),
+ UsesPICBase(0) {}
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -170,6 +178,11 @@ public:
const SmallVectorImpl<unsigned> &
getMustSaveCRs() const { return MustSaveCRs; }
void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); }
+
+ void setUsesPICBase(bool uses) { UsesPICBase = uses; }
+ bool usesPICBase() const { return UsesPICBase; }
+
+ MCSymbol *getPICOffsetSymbol() const;
};
} // end of namespace llvm
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 19ccbfcdb169..9895ee6267aa 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "reginfo"
#include "PPCRegisterInfo.h"
#include "PPC.h"
#include "PPCFrameLowering.h"
@@ -27,7 +26,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -43,11 +41,13 @@
#include "llvm/Target/TargetOptions.h"
#include <cstdlib>
+using namespace llvm;
+
+#define DEBUG_TYPE "reginfo"
+
#define GET_REGINFO_TARGET_DESC
#include "PPCGenRegisterInfo.inc"
-using namespace llvm;
-
static cl::opt<bool>
EnableBasePointer("ppc-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
@@ -97,7 +97,7 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
return &PPC::GPRCRegClass;
}
-const uint16_t*
+const MCPhysReg*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.isDarwinABI())
return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
@@ -199,7 +199,16 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (PPCFI->needsFP(MF))
Reserved.set(PPC::R31);
- if (hasBasePointer(MF))
+ if (hasBasePointer(MF)) {
+ if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
+ MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ Reserved.set(PPC::R29);
+ else
+ Reserved.set(PPC::R30);
+ }
+
+ if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
+ MF.getTarget().getRelocationModel() == Reloc::PIC_)
Reserved.set(PPC::R30);
// Reserve Altivec registers when Altivec is unavailable.
@@ -230,12 +239,33 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case PPC::F8RCRegClassID:
case PPC::F4RCRegClassID:
case PPC::VRRCRegClassID:
+ case PPC::VFRCRegClassID:
+ case PPC::VSLRCRegClassID:
+ case PPC::VSHRCRegClassID:
return 32 - DefaultSafety;
+ case PPC::VSRCRegClassID:
+ case PPC::VSFRCRegClassID:
+ return 64 - DefaultSafety;
case PPC::CRRCRegClassID:
return 8 - DefaultSafety;
}
}
+const TargetRegisterClass*
+PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)const {
+ if (Subtarget.hasVSX()) {
+ // With VSX, we can inflate various sub-register classes to the full VSX
+ // register set.
+
+ if (RC == &PPC::F8RCRegClass)
+ return &PPC::VSFRCRegClass;
+ else if (RC == &PPC::VRRCRegClass)
+ return &PPC::VSRCRegClass;
+ }
+
+ return TargetRegisterInfo::getLargestLegalSuperClass(RC);
+}
+
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
@@ -452,6 +482,127 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
MBB.erase(II);
}
+static unsigned getCRFromCRBit(unsigned SrcReg) {
+ unsigned Reg = 0;
+ if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+ SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+ SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+ SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+ SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+ SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+ SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+ SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+ SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ assert(Reg != 0 && "Invalid CR bit register");
+ return Reg;
+}
+
+void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_CRBIT <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool LP64 = Subtarget.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, II, dl, TII.get(TargetOpcode::KILL),
+ getCRFromCRBit(SrcReg))
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
+ .addReg(getCRFromCRBit(SrcReg));
+
+ // If the saved register wasn't CR0LT, shift the bits left so that the bit to
+ // store is the first one. Mask all but that bit.
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ // rlwinm rA, rA, ShiftBits, 0, 0.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+ .addReg(Reg1, RegState::Kill)
+ .addImm(getEncodingValue(SrcReg))
+ .addImm(0).addImm(0);
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
+ .addReg(Reg, RegState::Kill),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CRBIT <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool LP64 = Subtarget.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_CRBIT does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ),
+ Reg), FrameIndex);
+
+ BuildMI(MBB, II, dl, TII.get(TargetOpcode::IMPLICIT_DEF), DestReg);
+
+ unsigned RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), RegO)
+ .addReg(getCRFromCRBit(DestReg));
+
+ unsigned ShiftBits = getEncodingValue(DestReg);
+ // rlwimi r11, r10, 32-ShiftBits, ..., ...
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWIMI8 : PPC::RLWIMI), RegO)
+ .addReg(RegO, RegState::Kill).addReg(Reg, RegState::Kill)
+ .addImm(ShiftBits ? 32-ShiftBits : 0)
+ .addImm(ShiftBits).addImm(ShiftBits);
+
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF),
+ getCRFromCRBit(DestReg))
+ .addReg(RegO, RegState::Kill)
+ // Make sure we have a use dependency all the way through this
+ // sequence of instructions. We can't have the other bits in the CR
+ // modified in between the mfocrf and the mtocrf.
+ .addReg(getCRFromCRBit(DestReg), RegState::Implicit);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex) const {
// Get the instruction.
@@ -595,6 +746,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else if (OpC == PPC::RESTORE_CR) {
lowerCRRestore(II, FrameIndex);
return;
+ } else if (OpC == PPC::SPILL_CRBIT) {
+ lowerCRBitSpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_CRBIT) {
+ lowerCRBitRestore(II, FrameIndex);
+ return;
} else if (OpC == PPC::SPILL_VRSAVE) {
lowerVRSAVESpilling(II, FrameIndex);
return;
@@ -695,7 +852,14 @@ unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
if (!hasBasePointer(MF))
return getFrameRegister(MF);
- return Subtarget.isPPC64() ? PPC::X30 : PPC::R30;
+ if (Subtarget.isPPC64())
+ return PPC::X30;
+
+ if (Subtarget.isSVR4ABI() &&
+ MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ return PPC::R29;
+
+ return PPC::R30;
}
bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
@@ -737,16 +901,6 @@ bool PPCRegisterInfo::
needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
assert(Offset < 0 && "Local offset must be negative");
- unsigned FIOperandNum = 0;
- while (!MI->getOperand(FIOperandNum).isFI()) {
- ++FIOperandNum;
- assert(FIOperandNum < MI->getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
- }
-
- unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
- Offset += MI->getOperand(OffsetOperandNo).getImm();
-
// It's the load/store FI references that cause issues, as it can be difficult
// to materialize the offset if it won't fit in the literal field. Estimate
// based on the size of the local frame and some conservative assumptions
@@ -812,11 +966,8 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
.addFrameIndex(FrameIdx).addImm(Offset);
}
-void
-PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
- unsigned BaseReg, int64_t Offset) const {
- MachineInstr &MI = *I;
-
+void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ int64_t Offset) const {
unsigned FIOperandNum = 0;
while (!MI.getOperand(FIOperandNum).isFI()) {
++FIOperandNum;
@@ -828,10 +979,28 @@ PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
Offset += MI.getOperand(OffsetOperandNo).getImm();
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const MCInstrDesc &MCID = MI.getDesc();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.constrainRegClass(BaseReg,
+ TII.getRegClass(MCID, FIOperandNum, this, MF));
}
bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
int64_t Offset) const {
+ unsigned FIOperandNum = 0;
+ while (!MI->getOperand(FIOperandNum).isFI()) {
+ ++FIOperandNum;
+ assert(FIOperandNum < MI->getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
+ Offset += MI->getOperand(OffsetOperandNo).getImm();
+
return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
(isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index dd3bb405dac3..13a35f6309d4 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -15,8 +15,8 @@
#ifndef POWERPC32_REGISTERINFO_H
#define POWERPC32_REGISTERINFO_H
-#include "llvm/ADT/DenseMap.h"
#include "PPC.h"
+#include "llvm/ADT/DenseMap.h"
#define GET_REGINFO_HEADER
#include "PPCGenRegisterInfo.inc"
@@ -34,33 +34,37 @@ public:
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
- virtual const TargetRegisterClass *
- getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const;
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const;
+ MachineFunction &MF) const override;
+
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
/// Code Generation virtual methods...
- const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
- const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
+ const MCPhysReg *
+ getCalleeSavedRegs(const MachineFunction* MF =nullptr) const override;
+ const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
const uint32_t *getNoPreservedMask() const;
- BitVector getReservedRegs(const MachineFunction &MF) const;
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
/// We require the register scavenger.
- bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override {
return true;
}
- bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
return true;
}
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
return true;
}
- virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+ bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override {
return true;
}
@@ -69,34 +73,39 @@ public:
unsigned FrameIndex) const;
void lowerCRRestore(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
+ void lowerCRBitSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerCRBitRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
void lowerVRSAVESpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
void lowerVRSAVERestore(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
- int &FrameIdx) const;
+ int &FrameIdx) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
// Support for virtual base registers.
- bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
void materializeFrameBaseRegister(MachineBasicBlock *MBB,
unsigned BaseReg, int FrameIdx,
- int64_t Offset) const;
- void resolveFrameIndex(MachineBasicBlock::iterator I,
- unsigned BaseReg, int64_t Offset) const;
- bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
+ int64_t Offset) const override;
+ void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ int64_t Offset) const override;
+ bool isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const override;
// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
// Base pointer (stack realignment) support.
unsigned getBaseRegister(const MachineFunction &MF) const;
bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
- bool needsStackRealignment(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 43663ce013e9..b3d145b2cc49 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -16,6 +16,8 @@ def sub_gt : SubRegIndex<1, 1>;
def sub_eq : SubRegIndex<1, 2>;
def sub_un : SubRegIndex<1, 3>;
def sub_32 : SubRegIndex<32>;
+def sub_64 : SubRegIndex<64>;
+def sub_128 : SubRegIndex<128>;
}
@@ -47,9 +49,36 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
-// VR - One of the 32 128-bit vector registers
-class VR<bits<5> num, string n> : PPCReg<n> {
+// VF - One of the 32 64-bit floating-point subregisters of the vector
+// registers (used by VSX).
+class VF<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
+ let HWEncoding{5} = 1;
+}
+
+// VR - One of the 32 128-bit vector registers
+class VR<VF SubReg, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
+ let HWEncoding{5} = 0;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
+}
+
+// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar
+// floating-point registers.
+class VSRL<FPR SubReg, string n> : PPCReg<n> {
+ let HWEncoding = SubReg.HWEncoding;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
+}
+
+// VSRH - One of the 32 128-bit VSX registers that overlap with the vector
+// registers.
+class VSRH<VR SubReg, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
+ let HWEncoding{5} = 1;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_128];
}
// CR - One of the 8 4-bit condition registers
@@ -80,12 +109,27 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
}
+// Floating-point vector subregisters (for VSX)
+foreach Index = 0-31 in {
+ def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
+}
+
// Vector registers
foreach Index = 0-31 in {
- def V#Index : VR<Index, "v"#Index>,
+ def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
+// VSX registers
+foreach Index = 0-31 in {
+ def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>,
+ DwarfRegAlias<!cast<FPR>("F"#Index)>;
+}
+foreach Index = 0-31 in {
+ def VSH#Index : VSRH<!cast<VR>("V"#Index), "vs" # !add(Index, 32)>,
+ DwarfRegAlias<!cast<VR>("V"#Index)>;
+}
+
// The reprsentation of r0 when treated as the constant 0.
def ZERO : GPR<0, "0">;
def ZERO8 : GP8<ZERO, "0">;
@@ -211,17 +255,39 @@ def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
-def CRBITRC : RegisterClass<"PPC", [i32], 32,
- (add CR0LT, CR0GT, CR0EQ, CR0UN,
- CR1LT, CR1GT, CR1EQ, CR1UN,
- CR2LT, CR2GT, CR2EQ, CR2UN,
+// VSX register classes (the allocation order mirrors that of the corresponding
+// subregister classes).
+def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
+ (add (sequence "VSL%u", 0, 13),
+ (sequence "VSL%u", 31, 14))>;
+def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
+ (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7,
+ VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14,
+ VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30,
+ VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23,
+ VSH22, VSH21, VSH20)>;
+def VSRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
+ (add VSLRC, VSHRC)>;
+
+// Register classes for the 64-bit "scalar" VSX subregisters.
+def VFRC : RegisterClass<"PPC", [f64], 64,
+ (add VF2, VF3, VF4, VF5, VF0, VF1, VF6, VF7,
+ VF8, VF9, VF10, VF11, VF12, VF13, VF14,
+ VF15, VF16, VF17, VF18, VF19, VF31, VF30,
+ VF29, VF28, VF27, VF26, VF25, VF24, VF23,
+ VF22, VF21, VF20)>;
+def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>;
+
+def CRBITRC : RegisterClass<"PPC", [i1], 32,
+ (add CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN,
CR4LT, CR4GT, CR4EQ, CR4UN,
CR5LT, CR5GT, CR5EQ, CR5UN,
CR6LT, CR6GT, CR6EQ, CR6UN,
- CR7LT, CR7GT, CR7EQ, CR7UN)>
-{
- let CopyCost = -1;
+ CR7LT, CR7GT, CR7EQ, CR7UN,
+ CR1LT, CR1GT, CR1EQ, CR1UN,
+ CR0LT, CR0GT, CR0EQ, CR0UN)> {
+ let Size = 32;
}
def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 92ba69c2c6b8..1221d4149996 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -8,114 +8,106 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Functional units across PowerPC chips sets
-//
-def BPU : FuncUnit; // Branch unit
-def SLU : FuncUnit; // Store/load unit
-def SRU : FuncUnit; // special register unit
-def IU1 : FuncUnit; // integer unit 1 (simple)
-def IU2 : FuncUnit; // integer unit 2 (complex)
-def FPU1 : FuncUnit; // floating point unit 1
-def FPU2 : FuncUnit; // floating point unit 2
-def VPU : FuncUnit; // vector permutation unit
-def VIU1 : FuncUnit; // vector integer unit 1 (simple)
-def VIU2 : FuncUnit; // vector integer unit 2 (complex)
-def VFPU : FuncUnit; // vector floating point unit
-
-//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for PowerPC
//
-def IntSimple : InstrItinClass;
-def IntGeneral : InstrItinClass;
-def IntCompare : InstrItinClass;
-def IntDivD : InstrItinClass;
-def IntDivW : InstrItinClass;
-def IntMFFS : InstrItinClass;
-def IntMFVSCR : InstrItinClass;
-def IntMTFSB0 : InstrItinClass;
-def IntMTSRD : InstrItinClass;
-def IntMulHD : InstrItinClass;
-def IntMulHW : InstrItinClass;
-def IntMulHWU : InstrItinClass;
-def IntMulLI : InstrItinClass;
-def IntRFID : InstrItinClass;
-def IntRotateD : InstrItinClass;
-def IntRotateDI : InstrItinClass;
-def IntRotate : InstrItinClass;
-def IntShift : InstrItinClass;
-def IntTrapD : InstrItinClass;
-def IntTrapW : InstrItinClass;
-def BrB : InstrItinClass;
-def BrCR : InstrItinClass;
-def BrMCR : InstrItinClass;
-def BrMCRX : InstrItinClass;
-def LdStDCBA : InstrItinClass;
-def LdStDCBF : InstrItinClass;
-def LdStDCBI : InstrItinClass;
-def LdStLoad : InstrItinClass;
-def LdStLoadUpd : InstrItinClass;
-def LdStStore : InstrItinClass;
-def LdStStoreUpd : InstrItinClass;
-def LdStDSS : InstrItinClass;
-def LdStICBI : InstrItinClass;
-def LdStLD : InstrItinClass;
-def LdStLDU : InstrItinClass;
-def LdStLDARX : InstrItinClass;
-def LdStLFD : InstrItinClass;
-def LdStLFDU : InstrItinClass;
-def LdStLHA : InstrItinClass;
-def LdStLHAU : InstrItinClass;
-def LdStLMW : InstrItinClass;
-def LdStLVecX : InstrItinClass;
-def LdStLWA : InstrItinClass;
-def LdStLWARX : InstrItinClass;
-def LdStSLBIA : InstrItinClass;
-def LdStSLBIE : InstrItinClass;
-def LdStSTD : InstrItinClass;
-def LdStSTDCX : InstrItinClass;
-def LdStSTDU : InstrItinClass;
-def LdStSTFD : InstrItinClass;
-def LdStSTFDU : InstrItinClass;
-def LdStSTVEBX : InstrItinClass;
-def LdStSTWCX : InstrItinClass;
-def LdStSync : InstrItinClass;
-def SprISYNC : InstrItinClass;
-def SprMFSR : InstrItinClass;
-def SprMTMSR : InstrItinClass;
-def SprMTSR : InstrItinClass;
-def SprTLBSYNC : InstrItinClass;
-def SprMFCR : InstrItinClass;
-def SprMFMSR : InstrItinClass;
-def SprMFSPR : InstrItinClass;
-def SprMFTB : InstrItinClass;
-def SprMTSPR : InstrItinClass;
-def SprMTSRIN : InstrItinClass;
-def SprRFI : InstrItinClass;
-def SprSC : InstrItinClass;
-def FPGeneral : InstrItinClass;
-def FPAddSub : InstrItinClass;
-def FPCompare : InstrItinClass;
-def FPDivD : InstrItinClass;
-def FPDivS : InstrItinClass;
-def FPFused : InstrItinClass;
-def FPRes : InstrItinClass;
-def FPSqrt : InstrItinClass;
-def VecGeneral : InstrItinClass;
-def VecFP : InstrItinClass;
-def VecFPCompare : InstrItinClass;
-def VecComplex : InstrItinClass;
-def VecPerm : InstrItinClass;
-def VecFPRound : InstrItinClass;
-def VecVSL : InstrItinClass;
-def VecVSR : InstrItinClass;
-def SprMTMSRD : InstrItinClass;
-def SprSLIE : InstrItinClass;
-def SprSLBIE : InstrItinClass;
-def SprSLBMTE : InstrItinClass;
-def SprSLBMFEE : InstrItinClass;
-def SprSLBIA : InstrItinClass;
-def SprTLBIEL : InstrItinClass;
-def SprTLBIE : InstrItinClass;
+def IIC_IntSimple : InstrItinClass;
+def IIC_IntGeneral : InstrItinClass;
+def IIC_IntCompare : InstrItinClass;
+def IIC_IntDivD : InstrItinClass;
+def IIC_IntDivW : InstrItinClass;
+def IIC_IntMFFS : InstrItinClass;
+def IIC_IntMFVSCR : InstrItinClass;
+def IIC_IntMTFSB0 : InstrItinClass;
+def IIC_IntMTSRD : InstrItinClass;
+def IIC_IntMulHD : InstrItinClass;
+def IIC_IntMulHW : InstrItinClass;
+def IIC_IntMulHWU : InstrItinClass;
+def IIC_IntMulLI : InstrItinClass;
+def IIC_IntRFID : InstrItinClass;
+def IIC_IntRotateD : InstrItinClass;
+def IIC_IntRotateDI : InstrItinClass;
+def IIC_IntRotate : InstrItinClass;
+def IIC_IntShift : InstrItinClass;
+def IIC_IntTrapD : InstrItinClass;
+def IIC_IntTrapW : InstrItinClass;
+def IIC_BrB : InstrItinClass;
+def IIC_BrCR : InstrItinClass;
+def IIC_BrMCR : InstrItinClass;
+def IIC_BrMCRX : InstrItinClass;
+def IIC_LdStDCBA : InstrItinClass;
+def IIC_LdStDCBF : InstrItinClass;
+def IIC_LdStDCBI : InstrItinClass;
+def IIC_LdStLoad : InstrItinClass;
+def IIC_LdStLoadUpd : InstrItinClass;
+def IIC_LdStLoadUpdX : InstrItinClass;
+def IIC_LdStStore : InstrItinClass;
+def IIC_LdStStoreUpd : InstrItinClass;
+def IIC_LdStDSS : InstrItinClass;
+def IIC_LdStICBI : InstrItinClass;
+def IIC_LdStLD : InstrItinClass;
+def IIC_LdStLDU : InstrItinClass;
+def IIC_LdStLDUX : InstrItinClass;
+def IIC_LdStLDARX : InstrItinClass;
+def IIC_LdStLFD : InstrItinClass;
+def IIC_LdStLFDU : InstrItinClass;
+def IIC_LdStLFDUX : InstrItinClass;
+def IIC_LdStLHA : InstrItinClass;
+def IIC_LdStLHAU : InstrItinClass;
+def IIC_LdStLHAUX : InstrItinClass;
+def IIC_LdStLMW : InstrItinClass;
+def IIC_LdStLVecX : InstrItinClass;
+def IIC_LdStLWA : InstrItinClass;
+def IIC_LdStLWARX : InstrItinClass;
+def IIC_LdStSLBIA : InstrItinClass;
+def IIC_LdStSLBIE : InstrItinClass;
+def IIC_LdStSTD : InstrItinClass;
+def IIC_LdStSTDCX : InstrItinClass;
+def IIC_LdStSTDU : InstrItinClass;
+def IIC_LdStSTDUX : InstrItinClass;
+def IIC_LdStSTFD : InstrItinClass;
+def IIC_LdStSTFDU : InstrItinClass;
+def IIC_LdStSTVEBX : InstrItinClass;
+def IIC_LdStSTWCX : InstrItinClass;
+def IIC_LdStSync : InstrItinClass;
+def IIC_SprISYNC : InstrItinClass;
+def IIC_SprMFSR : InstrItinClass;
+def IIC_SprMTMSR : InstrItinClass;
+def IIC_SprMTSR : InstrItinClass;
+def IIC_SprTLBSYNC : InstrItinClass;
+def IIC_SprMFCR : InstrItinClass;
+def IIC_SprMFCRF : InstrItinClass;
+def IIC_SprMFMSR : InstrItinClass;
+def IIC_SprMFSPR : InstrItinClass;
+def IIC_SprMFTB : InstrItinClass;
+def IIC_SprMTSPR : InstrItinClass;
+def IIC_SprMTSRIN : InstrItinClass;
+def IIC_SprRFI : InstrItinClass;
+def IIC_SprSC : InstrItinClass;
+def IIC_FPGeneral : InstrItinClass;
+def IIC_FPAddSub : InstrItinClass;
+def IIC_FPCompare : InstrItinClass;
+def IIC_FPDivD : InstrItinClass;
+def IIC_FPDivS : InstrItinClass;
+def IIC_FPFused : InstrItinClass;
+def IIC_FPRes : InstrItinClass;
+def IIC_FPSqrtD : InstrItinClass;
+def IIC_FPSqrtS : InstrItinClass;
+def IIC_VecGeneral : InstrItinClass;
+def IIC_VecFP : InstrItinClass;
+def IIC_VecFPCompare : InstrItinClass;
+def IIC_VecComplex : InstrItinClass;
+def IIC_VecPerm : InstrItinClass;
+def IIC_VecFPRound : InstrItinClass;
+def IIC_VecVSL : InstrItinClass;
+def IIC_VecVSR : InstrItinClass;
+def IIC_SprMTMSRD : InstrItinClass;
+def IIC_SprSLIE : InstrItinClass;
+def IIC_SprSLBIE : InstrItinClass;
+def IIC_SprSLBMTE : InstrItinClass;
+def IIC_SprSLBMFEE : InstrItinClass;
+def IIC_SprSLBIA : InstrItinClass;
+def IIC_SprTLBIEL : InstrItinClass;
+def IIC_SprTLBIE : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
@@ -125,6 +117,7 @@ include "PPCSchedule440.td"
include "PPCScheduleG4.td"
include "PPCScheduleG4Plus.td"
include "PPCScheduleG5.td"
+include "PPCScheduleP7.td"
include "PPCScheduleA2.td"
include "PPCScheduleE500mc.td"
include "PPCScheduleE5500.td"
@@ -136,392 +129,392 @@ include "PPCScheduleE5500.td"
//
// opcode itinerary class
// ====== ===============
-// add IntSimple
-// addc IntGeneral
-// adde IntGeneral
-// addi IntSimple
-// addic IntGeneral
-// addic. IntGeneral
-// addis IntSimple
-// addme IntGeneral
-// addze IntGeneral
-// and IntSimple
-// andc IntSimple
-// andi. IntGeneral
-// andis. IntGeneral
-// b BrB
-// bc BrB
-// bcctr BrB
-// bclr BrB
-// cmp IntCompare
-// cmpi IntCompare
-// cmpl IntCompare
-// cmpli IntCompare
-// cntlzd IntRotateD
-// cntlzw IntGeneral
-// crand BrCR
-// crandc BrCR
-// creqv BrCR
-// crnand BrCR
-// crnor BrCR
-// cror BrCR
-// crorc BrCR
-// crxor BrCR
-// dcba LdStDCBA
-// dcbf LdStDCBF
-// dcbi LdStDCBI
-// dcbst LdStDCBF
-// dcbt LdStLoad
-// dcbtst LdStLoad
-// dcbz LdStDCBF
-// divd IntDivD
-// divdu IntDivD
-// divw IntDivW
-// divwu IntDivW
-// dss LdStDSS
-// dst LdStDSS
-// dstst LdStDSS
-// eciwx LdStLoad
-// ecowx LdStLoad
-// eieio LdStLoad
-// eqv IntSimple
-// extsb IntSimple
-// extsh IntSimple
-// extsw IntSimple
-// fabs FPGeneral
-// fadd FPAddSub
-// fadds FPGeneral
-// fcfid FPGeneral
-// fcmpo FPCompare
-// fcmpu FPCompare
-// fctid FPGeneral
-// fctidz FPGeneral
-// fctiw FPGeneral
-// fctiwz FPGeneral
-// fdiv FPDivD
-// fdivs FPDivS
-// fmadd FPFused
-// fmadds FPGeneral
-// fmr FPGeneral
-// fmsub FPFused
-// fmsubs FPGeneral
-// fmul FPFused
-// fmuls FPGeneral
-// fnabs FPGeneral
-// fneg FPGeneral
-// fnmadd FPFused
-// fnmadds FPGeneral
-// fnmsub FPFused
-// fnmsubs FPGeneral
-// fres FPRes
-// frsp FPGeneral
-// frsqrte FPGeneral
-// fsel FPGeneral
-// fsqrt FPSqrt
-// fsqrts FPSqrt
-// fsub FPAddSub
-// fsubs FPGeneral
-// icbi LdStICBI
-// isync SprISYNC
-// lbz LdStLoad
-// lbzu LdStLoadUpd
-// lbzux LdStLoadUpd
-// lbzx LdStLoad
-// ld LdStLD
-// ldarx LdStLDARX
-// ldu LdStLDU
-// ldux LdStLDU
-// ldx LdStLD
-// lfd LdStLFD
-// lfdu LdStLFDU
-// lfdux LdStLFDU
-// lfdx LdStLFD
-// lfs LdStLFD
-// lfsu LdStLFDU
-// lfsux LdStLFDU
-// lfsx LdStLFD
-// lha LdStLHA
-// lhau LdStLHAU
-// lhaux LdStLHAU
-// lhax LdStLHA
-// lhbrx LdStLoad
-// lhz LdStLoad
-// lhzu LdStLoadUpd
-// lhzux LdStLoadUpd
-// lhzx LdStLoad
-// lmw LdStLMW
-// lswi LdStLMW
-// lswx LdStLMW
-// lvebx LdStLVecX
-// lvehx LdStLVecX
-// lvewx LdStLVecX
-// lvsl LdStLVecX
-// lvsr LdStLVecX
-// lvx LdStLVecX
-// lvxl LdStLVecX
-// lwa LdStLWA
-// lwarx LdStLWARX
-// lwaux LdStLHAU
-// lwax LdStLHA
-// lwbrx LdStLoad
-// lwz LdStLoad
-// lwzu LdStLoadUpd
-// lwzux LdStLoadUpd
-// lwzx LdStLoad
-// mcrf BrMCR
-// mcrfs FPGeneral
-// mcrxr BrMCRX
-// mfcr SprMFCR
-// mffs IntMFFS
-// mfmsr SprMFMSR
-// mfspr SprMFSPR
-// mfsr SprMFSR
-// mfsrin SprMFSR
-// mftb SprMFTB
-// mfvscr IntMFVSCR
-// mtcrf BrMCRX
-// mtfsb0 IntMTFSB0
-// mtfsb1 IntMTFSB0
-// mtfsf IntMTFSB0
-// mtfsfi IntMTFSB0
-// mtmsr SprMTMSR
-// mtmsrd LdStLD
-// mtspr SprMTSPR
-// mtsr SprMTSR
-// mtsrd IntMTSRD
-// mtsrdin IntMTSRD
-// mtsrin SprMTSRIN
-// mtvscr IntMFVSCR
-// mulhd IntMulHD
-// mulhdu IntMulHD
-// mulhw IntMulHW
-// mulhwu IntMulHWU
-// mulld IntMulHD
-// mulli IntMulLI
-// mullw IntMulHW
-// nand IntSimple
-// neg IntSimple
-// nor IntSimple
-// or IntSimple
-// orc IntSimple
-// ori IntSimple
-// oris IntSimple
-// rfi SprRFI
-// rfid IntRFID
-// rldcl IntRotateD
-// rldcr IntRotateD
-// rldic IntRotateDI
-// rldicl IntRotateDI
-// rldicr IntRotateDI
-// rldimi IntRotateDI
-// rlwimi IntRotate
-// rlwinm IntGeneral
-// rlwnm IntGeneral
-// sc SprSC
-// slbia LdStSLBIA
-// slbie LdStSLBIE
-// sld IntRotateD
-// slw IntGeneral
-// srad IntRotateD
-// sradi IntRotateDI
-// sraw IntShift
-// srawi IntShift
-// srd IntRotateD
-// srw IntGeneral
-// stb LdStStore
-// stbu LdStStoreUpd
-// stbux LdStStoreUpd
-// stbx LdStStore
-// std LdStSTD
-// stdcx. LdStSTDCX
-// stdu LdStSTDU
-// stdux LdStSTDU
-// stdx LdStSTD
-// stfd LdStSTFD
-// stfdu LdStSTFDU
-// stfdux LdStSTFDU
-// stfdx LdStSTFD
-// stfiwx LdStSTFD
-// stfs LdStSTFD
-// stfsu LdStSTFDU
-// stfsux LdStSTFDU
-// stfsx LdStSTFD
-// sth LdStStore
-// sthbrx LdStStore
-// sthu LdStStoreUpd
-// sthux LdStStoreUpd
-// sthx LdStStore
-// stmw LdStLMW
-// stswi LdStLMW
-// stswx LdStLMW
-// stvebx LdStSTVEBX
-// stvehx LdStSTVEBX
-// stvewx LdStSTVEBX
-// stvx LdStSTVEBX
-// stvxl LdStSTVEBX
-// stw LdStStore
-// stwbrx LdStStore
-// stwcx. LdStSTWCX
-// stwu LdStStoreUpd
-// stwux LdStStoreUpd
-// stwx LdStStore
-// subf IntGeneral
-// subfc IntGeneral
-// subfe IntGeneral
-// subfic IntGeneral
-// subfme IntGeneral
-// subfze IntGeneral
-// sync LdStSync
-// td IntTrapD
-// tdi IntTrapD
-// tlbia LdStSLBIA
-// tlbie LdStDCBF
-// tlbsync SprTLBSYNC
-// tw IntTrapW
-// twi IntTrapW
-// vaddcuw VecGeneral
-// vaddfp VecFP
-// vaddsbs VecGeneral
-// vaddshs VecGeneral
-// vaddsws VecGeneral
-// vaddubm VecGeneral
-// vaddubs VecGeneral
-// vadduhm VecGeneral
-// vadduhs VecGeneral
-// vadduwm VecGeneral
-// vadduws VecGeneral
-// vand VecGeneral
-// vandc VecGeneral
-// vavgsb VecGeneral
-// vavgsh VecGeneral
-// vavgsw VecGeneral
-// vavgub VecGeneral
-// vavguh VecGeneral
-// vavguw VecGeneral
-// vcfsx VecFP
-// vcfux VecFP
-// vcmpbfp VecFPCompare
-// vcmpeqfp VecFPCompare
-// vcmpequb VecGeneral
-// vcmpequh VecGeneral
-// vcmpequw VecGeneral
-// vcmpgefp VecFPCompare
-// vcmpgtfp VecFPCompare
-// vcmpgtsb VecGeneral
-// vcmpgtsh VecGeneral
-// vcmpgtsw VecGeneral
-// vcmpgtub VecGeneral
-// vcmpgtuh VecGeneral
-// vcmpgtuw VecGeneral
-// vctsxs VecFP
-// vctuxs VecFP
-// vexptefp VecFP
-// vlogefp VecFP
-// vmaddfp VecFP
-// vmaxfp VecFPCompare
-// vmaxsb VecGeneral
-// vmaxsh VecGeneral
-// vmaxsw VecGeneral
-// vmaxub VecGeneral
-// vmaxuh VecGeneral
-// vmaxuw VecGeneral
-// vmhaddshs VecComplex
-// vmhraddshs VecComplex
-// vminfp VecFPCompare
-// vminsb VecGeneral
-// vminsh VecGeneral
-// vminsw VecGeneral
-// vminub VecGeneral
-// vminuh VecGeneral
-// vminuw VecGeneral
-// vmladduhm VecComplex
-// vmrghb VecPerm
-// vmrghh VecPerm
-// vmrghw VecPerm
-// vmrglb VecPerm
-// vmrglh VecPerm
-// vmrglw VecPerm
-// vmsubfp VecFP
-// vmsummbm VecComplex
-// vmsumshm VecComplex
-// vmsumshs VecComplex
-// vmsumubm VecComplex
-// vmsumuhm VecComplex
-// vmsumuhs VecComplex
-// vmulesb VecComplex
-// vmulesh VecComplex
-// vmuleub VecComplex
-// vmuleuh VecComplex
-// vmulosb VecComplex
-// vmulosh VecComplex
-// vmuloub VecComplex
-// vmulouh VecComplex
-// vnor VecGeneral
-// vor VecGeneral
-// vperm VecPerm
-// vpkpx VecPerm
-// vpkshss VecPerm
-// vpkshus VecPerm
-// vpkswss VecPerm
-// vpkswus VecPerm
-// vpkuhum VecPerm
-// vpkuhus VecPerm
-// vpkuwum VecPerm
-// vpkuwus VecPerm
-// vrefp VecFPRound
-// vrfim VecFPRound
-// vrfin VecFPRound
-// vrfip VecFPRound
-// vrfiz VecFPRound
-// vrlb VecGeneral
-// vrlh VecGeneral
-// vrlw VecGeneral
-// vrsqrtefp VecFP
-// vsel VecGeneral
-// vsl VecVSL
-// vslb VecGeneral
-// vsldoi VecPerm
-// vslh VecGeneral
-// vslo VecPerm
-// vslw VecGeneral
-// vspltb VecPerm
-// vsplth VecPerm
-// vspltisb VecPerm
-// vspltish VecPerm
-// vspltisw VecPerm
-// vspltw VecPerm
-// vsr VecVSR
-// vsrab VecGeneral
-// vsrah VecGeneral
-// vsraw VecGeneral
-// vsrb VecGeneral
-// vsrh VecGeneral
-// vsro VecPerm
-// vsrw VecGeneral
-// vsubcuw VecGeneral
-// vsubfp VecFP
-// vsubsbs VecGeneral
-// vsubshs VecGeneral
-// vsubsws VecGeneral
-// vsububm VecGeneral
-// vsububs VecGeneral
-// vsubuhm VecGeneral
-// vsubuhs VecGeneral
-// vsubuwm VecGeneral
-// vsubuws VecGeneral
-// vsum2sws VecComplex
-// vsum4sbs VecComplex
-// vsum4shs VecComplex
-// vsum4ubs VecComplex
-// vsumsws VecComplex
-// vupkhpx VecPerm
-// vupkhsb VecPerm
-// vupkhsh VecPerm
-// vupklpx VecPerm
-// vupklsb VecPerm
-// vupklsh VecPerm
-// vxor VecGeneral
-// xor IntSimple
-// xori IntSimple
-// xoris IntSimple
+// add IIC_IntSimple
+// addc IIC_IntGeneral
+// adde IIC_IntGeneral
+// addi IIC_IntSimple
+// addic IIC_IntGeneral
+// addic. IIC_IntGeneral
+// addis IIC_IntSimple
+// addme IIC_IntGeneral
+// addze IIC_IntGeneral
+// and IIC_IntSimple
+// andc IIC_IntSimple
+// andi. IIC_IntGeneral
+// andis. IIC_IntGeneral
+// b IIC_BrB
+// bc IIC_BrB
+// bcctr IIC_BrB
+// bclr IIC_BrB
+// cmp IIC_IntCompare
+// cmpi IIC_IntCompare
+// cmpl IIC_IntCompare
+// cmpli IIC_IntCompare
+// cntlzd IIC_IntRotateD
+// cntlzw IIC_IntGeneral
+// crand IIC_BrCR
+// crandc IIC_BrCR
+// creqv IIC_BrCR
+// crnand IIC_BrCR
+// crnor IIC_BrCR
+// cror IIC_BrCR
+// crorc IIC_BrCR
+// crxor IIC_BrCR
+// dcba IIC_LdStDCBA
+// dcbf IIC_LdStDCBF
+// dcbi IIC_LdStDCBI
+// dcbst IIC_LdStDCBF
+// dcbt IIC_LdStLoad
+// dcbtst IIC_LdStLoad
+// dcbz IIC_LdStDCBF
+// divd IIC_IntDivD
+// divdu IIC_IntDivD
+// divw IIC_IntDivW
+// divwu IIC_IntDivW
+// dss IIC_LdStDSS
+// dst IIC_LdStDSS
+// dstst IIC_LdStDSS
+// eciwx IIC_LdStLoad
+// ecowx IIC_LdStLoad
+// eieio IIC_LdStLoad
+// eqv IIC_IntSimple
+// extsb IIC_IntSimple
+// extsh IIC_IntSimple
+// extsw IIC_IntSimple
+// fabs IIC_FPGeneral
+// fadd IIC_FPAddSub
+// fadds IIC_FPGeneral
+// fcfid IIC_FPGeneral
+// fcmpo IIC_FPCompare
+// fcmpu IIC_FPCompare
+// fctid IIC_FPGeneral
+// fctidz IIC_FPGeneral
+// fctiw IIC_FPGeneral
+// fctiwz IIC_FPGeneral
+// fdiv IIC_FPDivD
+// fdivs IIC_FPDivS
+// fmadd IIC_FPFused
+// fmadds IIC_FPGeneral
+// fmr IIC_FPGeneral
+// fmsub IIC_FPFused
+// fmsubs IIC_FPGeneral
+// fmul IIC_FPFused
+// fmuls IIC_FPGeneral
+// fnabs IIC_FPGeneral
+// fneg IIC_FPGeneral
+// fnmadd IIC_FPFused
+// fnmadds IIC_FPGeneral
+// fnmsub IIC_FPFused
+// fnmsubs IIC_FPGeneral
+// fres IIC_FPRes
+// frsp IIC_FPGeneral
+// frsqrte IIC_FPGeneral
+// fsel IIC_FPGeneral
+// fsqrt IIC_FPSqrtD
+// fsqrts IIC_FPSqrtS
+// fsub IIC_FPAddSub
+// fsubs IIC_FPGeneral
+// icbi IIC_LdStICBI
+// isync IIC_SprISYNC
+// lbz IIC_LdStLoad
+// lbzu IIC_LdStLoadUpd
+// lbzux IIC_LdStLoadUpdX
+// lbzx IIC_LdStLoad
+// ld IIC_LdStLD
+// ldarx IIC_LdStLDARX
+// ldu IIC_LdStLDU
+// ldux IIC_LdStLDUX
+// ldx IIC_LdStLD
+// lfd IIC_LdStLFD
+// lfdu IIC_LdStLFDU
+// lfdux IIC_LdStLFDUX
+// lfdx IIC_LdStLFD
+// lfs IIC_LdStLFD
+// lfsu IIC_LdStLFDU
+// lfsux IIC_LdStLFDUX
+// lfsx IIC_LdStLFD
+// lha IIC_LdStLHA
+// lhau IIC_LdStLHAU
+// lhaux IIC_LdStLHAUX
+// lhax IIC_LdStLHA
+// lhbrx IIC_LdStLoad
+// lhz IIC_LdStLoad
+// lhzu IIC_LdStLoadUpd
+// lhzux IIC_LdStLoadUpdX
+// lhzx IIC_LdStLoad
+// lmw IIC_LdStLMW
+// lswi IIC_LdStLMW
+// lswx IIC_LdStLMW
+// lvebx IIC_LdStLVecX
+// lvehx IIC_LdStLVecX
+// lvewx IIC_LdStLVecX
+// lvsl IIC_LdStLVecX
+// lvsr IIC_LdStLVecX
+// lvx IIC_LdStLVecX
+// lvxl IIC_LdStLVecX
+// lwa IIC_LdStLWA
+// lwarx IIC_LdStLWARX
+// lwaux IIC_LdStLHAUX
+// lwax IIC_LdStLHA
+// lwbrx IIC_LdStLoad
+// lwz IIC_LdStLoad
+// lwzu IIC_LdStLoadUpd
+// lwzux IIC_LdStLoadUpdX
+// lwzx IIC_LdStLoad
+// mcrf IIC_BrMCR
+// mcrfs IIC_FPGeneral
+// mcrxr IIC_BrMCRX
+// mfcr IIC_SprMFCR
+// mffs IIC_IntMFFS
+// mfmsr IIC_SprMFMSR
+// mfspr IIC_SprMFSPR
+// mfsr IIC_SprMFSR
+// mfsrin IIC_SprMFSR
+// mftb IIC_SprMFTB
+// mfvscr IIC_IntMFVSCR
+// mtcrf IIC_BrMCRX
+// mtfsb0 IIC_IntMTFSB0
+// mtfsb1 IIC_IntMTFSB0
+// mtfsf IIC_IntMTFSB0
+// mtfsfi IIC_IntMTFSB0
+// mtmsr IIC_SprMTMSR
+// mtmsrd IIC_LdStLD
+// mtspr IIC_SprMTSPR
+// mtsr IIC_SprMTSR
+// mtsrd IIC_IntMTSRD
+// mtsrdin IIC_IntMTSRD
+// mtsrin IIC_SprMTSRIN
+// mtvscr IIC_IntMFVSCR
+// mulhd IIC_IntMulHD
+// mulhdu IIC_IntMulHD
+// mulhw IIC_IntMulHW
+// mulhwu IIC_IntMulHWU
+// mulld IIC_IntMulHD
+// mulli IIC_IntMulLI
+// mullw IIC_IntMulHW
+// nand IIC_IntSimple
+// neg IIC_IntSimple
+// nor IIC_IntSimple
+// or IIC_IntSimple
+// orc IIC_IntSimple
+// ori IIC_IntSimple
+// oris IIC_IntSimple
+// rfi IIC_SprRFI
+// rfid IIC_IntRFID
+// rldcl IIC_IntRotateD
+// rldcr IIC_IntRotateD
+// rldic IIC_IntRotateDI
+// rldicl IIC_IntRotateDI
+// rldicr IIC_IntRotateDI
+// rldimi IIC_IntRotateDI
+// rlwimi IIC_IntRotate
+// rlwinm IIC_IntGeneral
+// rlwnm IIC_IntGeneral
+// sc IIC_SprSC
+// slbia IIC_LdStSLBIA
+// slbie IIC_LdStSLBIE
+// sld IIC_IntRotateD
+// slw IIC_IntGeneral
+// srad IIC_IntRotateD
+// sradi IIC_IntRotateDI
+// sraw IIC_IntShift
+// srawi IIC_IntShift
+// srd IIC_IntRotateD
+// srw IIC_IntGeneral
+// stb IIC_LdStStore
+// stbu IIC_LdStStoreUpd
+// stbux IIC_LdStStoreUpd
+// stbx IIC_LdStStore
+// std IIC_LdStSTD
+// stdcx. IIC_LdStSTDCX
+// stdu IIC_LdStSTDU
+// stdux IIC_LdStSTDUX
+// stdx IIC_LdStSTD
+// stfd IIC_LdStSTFD
+// stfdu IIC_LdStSTFDU
+// stfdux IIC_LdStSTFDU
+// stfdx IIC_LdStSTFD
+// stfiwx IIC_LdStSTFD
+// stfs IIC_LdStSTFD
+// stfsu IIC_LdStSTFDU
+// stfsux IIC_LdStSTFDU
+// stfsx IIC_LdStSTFD
+// sth IIC_LdStStore
+// sthbrx IIC_LdStStore
+// sthu IIC_LdStStoreUpd
+// sthux IIC_LdStStoreUpd
+// sthx IIC_LdStStore
+// stmw IIC_LdStLMW
+// stswi IIC_LdStLMW
+// stswx IIC_LdStLMW
+// stvebx IIC_LdStSTVEBX
+// stvehx IIC_LdStSTVEBX
+// stvewx IIC_LdStSTVEBX
+// stvx IIC_LdStSTVEBX
+// stvxl IIC_LdStSTVEBX
+// stw IIC_LdStStore
+// stwbrx IIC_LdStStore
+// stwcx. IIC_LdStSTWCX
+// stwu IIC_LdStStoreUpd
+// stwux IIC_LdStStoreUpd
+// stwx IIC_LdStStore
+// subf IIC_IntGeneral
+// subfc IIC_IntGeneral
+// subfe IIC_IntGeneral
+// subfic IIC_IntGeneral
+// subfme IIC_IntGeneral
+// subfze IIC_IntGeneral
+// sync IIC_LdStSync
+// td IIC_IntTrapD
+// tdi IIC_IntTrapD
+// tlbia IIC_LdStSLBIA
+// tlbie IIC_LdStDCBF
+// tlbsync IIC_SprTLBSYNC
+// tw IIC_IntTrapW
+// twi IIC_IntTrapW
+// vaddcuw IIC_VecGeneral
+// vaddfp IIC_VecFP
+// vaddsbs IIC_VecGeneral
+// vaddshs IIC_VecGeneral
+// vaddsws IIC_VecGeneral
+// vaddubm IIC_VecGeneral
+// vaddubs IIC_VecGeneral
+// vadduhm IIC_VecGeneral
+// vadduhs IIC_VecGeneral
+// vadduwm IIC_VecGeneral
+// vadduws IIC_VecGeneral
+// vand IIC_VecGeneral
+// vandc IIC_VecGeneral
+// vavgsb IIC_VecGeneral
+// vavgsh IIC_VecGeneral
+// vavgsw IIC_VecGeneral
+// vavgub IIC_VecGeneral
+// vavguh IIC_VecGeneral
+// vavguw IIC_VecGeneral
+// vcfsx IIC_VecFP
+// vcfux IIC_VecFP
+// vcmpbfp IIC_VecFPCompare
+// vcmpeqfp IIC_VecFPCompare
+// vcmpequb IIC_VecGeneral
+// vcmpequh IIC_VecGeneral
+// vcmpequw IIC_VecGeneral
+// vcmpgefp IIC_VecFPCompare
+// vcmpgtfp IIC_VecFPCompare
+// vcmpgtsb IIC_VecGeneral
+// vcmpgtsh IIC_VecGeneral
+// vcmpgtsw IIC_VecGeneral
+// vcmpgtub IIC_VecGeneral
+// vcmpgtuh IIC_VecGeneral
+// vcmpgtuw IIC_VecGeneral
+// vctsxs IIC_VecFP
+// vctuxs IIC_VecFP
+// vexptefp IIC_VecFP
+// vlogefp IIC_VecFP
+// vmaddfp IIC_VecFP
+// vmaxfp IIC_VecFPCompare
+// vmaxsb IIC_VecGeneral
+// vmaxsh IIC_VecGeneral
+// vmaxsw IIC_VecGeneral
+// vmaxub IIC_VecGeneral
+// vmaxuh IIC_VecGeneral
+// vmaxuw IIC_VecGeneral
+// vmhaddshs IIC_VecComplex
+// vmhraddshs IIC_VecComplex
+// vminfp IIC_VecFPCompare
+// vminsb IIC_VecGeneral
+// vminsh IIC_VecGeneral
+// vminsw IIC_VecGeneral
+// vminub IIC_VecGeneral
+// vminuh IIC_VecGeneral
+// vminuw IIC_VecGeneral
+// vmladduhm IIC_VecComplex
+// vmrghb IIC_VecPerm
+// vmrghh IIC_VecPerm
+// vmrghw IIC_VecPerm
+// vmrglb IIC_VecPerm
+// vmrglh IIC_VecPerm
+// vmrglw IIC_VecPerm
+// vmsubfp IIC_VecFP
+// vmsummbm IIC_VecComplex
+// vmsumshm IIC_VecComplex
+// vmsumshs IIC_VecComplex
+// vmsumubm IIC_VecComplex
+// vmsumuhm IIC_VecComplex
+// vmsumuhs IIC_VecComplex
+// vmulesb IIC_VecComplex
+// vmulesh IIC_VecComplex
+// vmuleub IIC_VecComplex
+// vmuleuh IIC_VecComplex
+// vmulosb IIC_VecComplex
+// vmulosh IIC_VecComplex
+// vmuloub IIC_VecComplex
+// vmulouh IIC_VecComplex
+// vnor IIC_VecGeneral
+// vor IIC_VecGeneral
+// vperm IIC_VecPerm
+// vpkpx IIC_VecPerm
+// vpkshss IIC_VecPerm
+// vpkshus IIC_VecPerm
+// vpkswss IIC_VecPerm
+// vpkswus IIC_VecPerm
+// vpkuhum IIC_VecPerm
+// vpkuhus IIC_VecPerm
+// vpkuwum IIC_VecPerm
+// vpkuwus IIC_VecPerm
+// vrefp IIC_VecFPRound
+// vrfim IIC_VecFPRound
+// vrfin IIC_VecFPRound
+// vrfip IIC_VecFPRound
+// vrfiz IIC_VecFPRound
+// vrlb IIC_VecGeneral
+// vrlh IIC_VecGeneral
+// vrlw IIC_VecGeneral
+// vrsqrtefp IIC_VecFP
+// vsel IIC_VecGeneral
+// vsl IIC_VecVSL
+// vslb IIC_VecGeneral
+// vsldoi IIC_VecPerm
+// vslh IIC_VecGeneral
+// vslo IIC_VecPerm
+// vslw IIC_VecGeneral
+// vspltb IIC_VecPerm
+// vsplth IIC_VecPerm
+// vspltisb IIC_VecPerm
+// vspltish IIC_VecPerm
+// vspltisw IIC_VecPerm
+// vspltw IIC_VecPerm
+// vsr IIC_VecVSR
+// vsrab IIC_VecGeneral
+// vsrah IIC_VecGeneral
+// vsraw IIC_VecGeneral
+// vsrb IIC_VecGeneral
+// vsrh IIC_VecGeneral
+// vsro IIC_VecPerm
+// vsrw IIC_VecGeneral
+// vsubcuw IIC_VecGeneral
+// vsubfp IIC_VecFP
+// vsubsbs IIC_VecGeneral
+// vsubshs IIC_VecGeneral
+// vsubsws IIC_VecGeneral
+// vsububm IIC_VecGeneral
+// vsububs IIC_VecGeneral
+// vsubuhm IIC_VecGeneral
+// vsubuhs IIC_VecGeneral
+// vsubuwm IIC_VecGeneral
+// vsubuws IIC_VecGeneral
+// vsum2sws IIC_VecComplex
+// vsum4sbs IIC_VecComplex
+// vsum4shs IIC_VecComplex
+// vsum4ubs IIC_VecComplex
+// vsumsws IIC_VecComplex
+// vupkhpx IIC_VecPerm
+// vupkhsb IIC_VecPerm
+// vupkhsh IIC_VecPerm
+// vupklpx IIC_VecPerm
+// vupklsb IIC_VecPerm
+// vupklsh IIC_VecPerm
+// vxor IIC_VecGeneral
+// xor IIC_IntSimple
+// xori IIC_IntSimple
+// xoris IIC_IntSimple
//
diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td
index 37b6eac10cfe..218fed248a31 100644
--- a/lib/Target/PowerPC/PPCSchedule440.td
+++ b/lib/Target/PowerPC/PPCSchedule440.td
@@ -26,43 +26,39 @@
//===----------------------------------------------------------------------===//
// Functional units on the PowerPC 440/450 chip sets
//
-def IFTH1 : FuncUnit; // Fetch unit 1
-def IFTH2 : FuncUnit; // Fetch unit 2
-def PDCD1 : FuncUnit; // Decode unit 1
-def PDCD2 : FuncUnit; // Decode unit 2
-def DISS1 : FuncUnit; // Issue unit 1
-def DISS2 : FuncUnit; // Issue unit 2
-def LRACC : FuncUnit; // Register access and dispatch for
- // the simple integer (J-pipe) and
- // load/store (L-pipe) pipelines
-def IRACC : FuncUnit; // Register access and dispatch for
- // the complex integer (I-pipe) pipeline
-def FRACC : FuncUnit; // Register access and dispatch for
- // the floating-point execution (F-pipe) pipeline
-def IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline
-def IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline
-def IWB : FuncUnit; // Write-back unit for the I pipeline
-def JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline
-def JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline
-def JWB : FuncUnit; // Write-back unit for the J pipeline
-def AGEN : FuncUnit; // Address generation for the L pipeline
-def CRD : FuncUnit; // D-cache access for the L pipeline
-def LWB : FuncUnit; // Write-back unit for the L pipeline
-def FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline
-def FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline
-def FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline
-def FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline
-def FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline
-def FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline
-def FWB : FuncUnit; // Write-back unit for the F pipeline
+def P440_DISS1 : FuncUnit; // Issue unit 1
+def P440_DISS2 : FuncUnit; // Issue unit 2
+def P440_LRACC : FuncUnit; // Register access and dispatch for
+ // the simple integer (J-pipe) and
+ // load/store (L-pipe) pipelines
+def P440_IRACC : FuncUnit; // Register access and dispatch for
+ // the complex integer (I-pipe) pipeline
+def P440_FRACC : FuncUnit; // Register access and dispatch for
+ // the floating-point execution (F-pipe) pipeline
+def P440_IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline
+def P440_IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline
+def P440_IWB : FuncUnit; // Write-back unit for the I pipeline
+def P440_JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline
+def P440_JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline
+def P440_JWB : FuncUnit; // Write-back unit for the J pipeline
+def P440_AGEN : FuncUnit; // Address generation for the L pipeline
+def P440_CRD : FuncUnit; // D-cache access for the L pipeline
+def P440_LWB : FuncUnit; // Write-back unit for the L pipeline
+def P440_FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline
+def P440_FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline
+def P440_FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline
+def P440_FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline
+def P440_FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline
+def P440_FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline
+def P440_FWB : FuncUnit; // Write-back unit for the F pipeline
-def LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used
- // to make sure that no lwarx/stwcx.
- // instructions are issued while another
- // lwarx/stwcx. is in the L pipe.
+def P440_LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used
+ // to make sure that no lwarx/stwcx.
+ // instructions are issued while another
+ // lwarx/stwcx. is in the L pipe.
-def GPR_Bypass : Bypass; // The bypass for general-purpose regs.
-def FPR_Bypass : Bypass; // The bypass for floating-point regs.
+def P440_GPR_Bypass : Bypass; // The bypass for general-purpose regs.
+def P440_FPR_Bypass : Bypass; // The bypass for floating-point regs.
// Notes:
// Instructions are held in the FRACC, LRACC and IRACC pipeline
@@ -104,560 +100,500 @@ def FPR_Bypass : Bypass; // The bypass for floating-point regs.
def PPC440Itineraries : ProcessorItineraries<
- [IFTH1, IFTH2, PDCD1, PDCD2, DISS1, DISS2, FRACC,
- IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB,
- FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold],
- [GPR_Bypass, FPR_Bypass], [
- InstrItinData<IntSimple , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC, LRACC]>,
- InstrStage<1, [IEXE1, JEXE1]>,
- InstrStage<1, [IEXE2, JEXE2]>,
- InstrStage<1, [IWB, JWB]>],
- [6, 4, 4],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC, LRACC]>,
- InstrStage<1, [IEXE1, JEXE1]>,
- InstrStage<1, [IEXE2, JEXE2]>,
- InstrStage<1, [IWB, JWB]>],
- [6, 4, 4],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntCompare , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC, LRACC]>,
- InstrStage<1, [IEXE1, JEXE1]>,
- InstrStage<1, [IEXE2, JEXE2]>,
- InstrStage<1, [IWB, JWB]>],
- [6, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntDivW , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<33, [IWB]>],
- [40, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMFFS , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [7, 4, 4],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMTFSB0 , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [7, 4, 4],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHW , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHWU , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulLI , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotate , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC, LRACC]>,
- InstrStage<1, [IEXE1, JEXE1]>,
- InstrStage<1, [IEXE2, JEXE2]>,
- InstrStage<1, [IWB, JWB]>],
- [6, 4, 4],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntShift , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC, LRACC]>,
- InstrStage<1, [IEXE1, JEXE1]>,
- InstrStage<1, [IEXE2, JEXE2]>,
- InstrStage<1, [IWB, JWB]>],
- [6, 4, 4],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntTrapW , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [6, 4],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<BrB , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<BrCR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<BrMCR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<BrMCRX , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4, 4],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBA , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStDCBF , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStDCBI , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLoad , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [9, 5],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLoadUpd , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [9, 5],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStStoreUpd, [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTFD , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5, 5],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTFDU , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5, 5],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [9, 5, 5],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLFDU , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [9, 5, 5],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLHA , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLHAU , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLWARX , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1]>,
- InstrStage<1, [IRACC], 0>,
- InstrStage<4, [LWARX_Hold], 0>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTDU , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<2, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1]>,
- InstrStage<1, [IRACC], 0>,
- InstrStage<4, [LWARX_Hold], 0>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTWCX , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1]>,
- InstrStage<1, [IRACC], 0>,
- InstrStage<4, [LWARX_Hold], 0>,
- InstrStage<1, [LRACC]>,
- InstrStage<1, [AGEN]>,
- InstrStage<1, [CRD]>,
- InstrStage<1, [LWB]>],
- [8, 5],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSync , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [LRACC]>,
- InstrStage<3, [AGEN], 1>,
- InstrStage<2, [CRD], 1>,
- InstrStage<1, [LWB]>]>,
- InstrItinData<SprISYNC , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC], 0>,
- InstrStage<1, [LRACC], 0>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [FEXE1], 0>,
- InstrStage<1, [AGEN], 0>,
- InstrStage<1, [JEXE1], 0>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [FEXE2], 0>,
- InstrStage<1, [CRD], 0>,
- InstrStage<1, [JEXE2], 0>,
- InstrStage<1, [IEXE2]>,
- InstrStage<6, [FEXE3], 0>,
- InstrStage<6, [LWB], 0>,
- InstrStage<6, [JWB], 0>,
- InstrStage<6, [IWB]>]>,
- InstrItinData<SprMFSR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [6, 4],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMTMSR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [6, 4],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMTSR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<3, [IWB]>],
- [9, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprTLBSYNC , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>]>,
- InstrItinData<SprMFCR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMFMSR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [7, 4],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMFSPR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<3, [IWB]>],
- [10, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMFTB , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<3, [IWB]>],
- [10, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSPR , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<3, [IWB]>],
- [10, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSRIN , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<3, [IWB]>],
- [10, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprRFI , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprSC , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [IRACC]>,
- InstrStage<1, [IEXE1]>,
- InstrStage<1, [IEXE2]>,
- InstrStage<1, [IWB]>],
- [8, 4],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<FPGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC]>,
- InstrStage<1, [FEXE1]>,
- InstrStage<1, [FEXE2]>,
- InstrStage<1, [FEXE3]>,
- InstrStage<1, [FEXE4]>,
- InstrStage<1, [FEXE5]>,
- InstrStage<1, [FEXE6]>,
- InstrStage<1, [FWB]>],
- [10, 4, 4],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPAddSub , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC]>,
- InstrStage<1, [FEXE1]>,
- InstrStage<1, [FEXE2]>,
- InstrStage<1, [FEXE3]>,
- InstrStage<1, [FEXE4]>,
- InstrStage<1, [FEXE5]>,
- InstrStage<1, [FEXE6]>,
- InstrStage<1, [FWB]>],
- [10, 4, 4],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC]>,
- InstrStage<1, [FEXE1]>,
- InstrStage<1, [FEXE2]>,
- InstrStage<1, [FEXE3]>,
- InstrStage<1, [FEXE4]>,
- InstrStage<1, [FEXE5]>,
- InstrStage<1, [FEXE6]>,
- InstrStage<1, [FWB]>],
- [10, 4, 4],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivD , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC]>,
- InstrStage<1, [FEXE1]>,
- InstrStage<1, [FEXE2]>,
- InstrStage<1, [FEXE3]>,
- InstrStage<1, [FEXE4]>,
- InstrStage<1, [FEXE5]>,
- InstrStage<1, [FEXE6]>,
- InstrStage<25, [FWB]>],
- [35, 4, 4],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivS , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC]>,
- InstrStage<1, [FEXE1]>,
- InstrStage<1, [FEXE2]>,
- InstrStage<1, [FEXE3]>,
- InstrStage<1, [FEXE4]>,
- InstrStage<1, [FEXE5]>,
- InstrStage<1, [FEXE6]>,
- InstrStage<13, [FWB]>],
- [23, 4, 4],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPFused , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC]>,
- InstrStage<1, [FEXE1]>,
- InstrStage<1, [FEXE2]>,
- InstrStage<1, [FEXE3]>,
- InstrStage<1, [FEXE4]>,
- InstrStage<1, [FEXE5]>,
- InstrStage<1, [FEXE6]>,
- InstrStage<1, [FWB]>],
- [10, 4, 4, 4],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPRes , [InstrStage<1, [IFTH1, IFTH2]>,
- InstrStage<1, [PDCD1, PDCD2]>,
- InstrStage<1, [DISS1, DISS2]>,
- InstrStage<1, [FRACC]>,
- InstrStage<1, [FEXE1]>,
- InstrStage<1, [FEXE2]>,
- InstrStage<1, [FEXE3]>,
- InstrStage<1, [FEXE4]>,
- InstrStage<1, [FEXE5]>,
- InstrStage<1, [FEXE6]>,
- InstrStage<1, [FWB]>],
- [10, 4],
- [FPR_Bypass, FPR_Bypass]>
+ [P440_DISS1, P440_DISS2, P440_FRACC, P440_IRACC, P440_IEXE1, P440_IEXE2,
+ P440_IWB, P440_LRACC, P440_JEXE1, P440_JEXE2, P440_JWB, P440_AGEN, P440_CRD,
+ P440_LWB, P440_FEXE1, P440_FEXE2, P440_FEXE3, P440_FEXE4, P440_FEXE5,
+ P440_FEXE6, P440_FWB, P440_LWARX_Hold],
+ [P440_GPR_Bypass, P440_FPR_Bypass], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<33, [P440_IWB]>],
+ [36, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMFFS, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [3, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [3, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [2, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 1, 1],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLMW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P440_DISS1]>,
+ InstrStage<1, [P440_IRACC], 0>,
+ InstrStage<4, [P440_LWARX_Hold], 0>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [P440_DISS1]>,
+ InstrStage<1, [P440_IRACC], 0>,
+ InstrStage<4, [P440_LWARX_Hold], 0>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [P440_DISS1]>,
+ InstrStage<1, [P440_IRACC], 0>,
+ InstrStage<4, [P440_LWARX_Hold], 0>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<3, [P440_AGEN], 1>,
+ InstrStage<2, [P440_CRD], 1>,
+ InstrStage<1, [P440_LWB]>]>,
+ InstrItinData<IIC_SprISYNC, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC], 0>,
+ InstrStage<1, [P440_LRACC], 0>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_FEXE1], 0>,
+ InstrStage<1, [P440_AGEN], 0>,
+ InstrStage<1, [P440_JEXE1], 0>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_FEXE2], 0>,
+ InstrStage<1, [P440_CRD], 0>,
+ InstrStage<1, [P440_JEXE2], 0>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<6, [P440_FEXE3], 0>,
+ InstrStage<6, [P440_LWB], 0>,
+ InstrStage<6, [P440_JWB], 0>,
+ InstrStage<6, [P440_IWB]>]>,
+ InstrItinData<IIC_SprMFSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [2, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [2, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [5, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [3, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSRIN, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprRFI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprSC, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0],
+ [P440_FPR_Bypass,
+ P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0],
+ [P440_FPR_Bypass,
+ P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0],
+ [P440_FPR_Bypass, P440_FPR_Bypass,
+ P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<25, [P440_FWB]>],
+ [31, 0, 0],
+ [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<13, [P440_FWB]>],
+ [19, 0, 0],
+ [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0, 0],
+ [P440_FPR_Bypass,
+ P440_FPR_Bypass, P440_FPR_Bypass,
+ P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0],
+ [P440_FPR_Bypass, P440_FPR_Bypass]>
]>;
+
+// ===---------------------------------------------------------------------===//
+// PPC440 machine model for scheduling and other instruction cost heuristics.
+
+def PPC440Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 instructions are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 5; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+
+ let Itineraries = PPC440Itineraries;
+}
+
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index 1612cd2a0b84..14476963bad0 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -14,8 +14,8 @@
//===----------------------------------------------------------------------===//
// Functional units on the PowerPC A2 chip sets
//
-def XU : FuncUnit; // XU pipeline
-def FU : FuncUnit; // FI pipeline
+def A2_XU : FuncUnit; // A2_XU pipeline
+def A2_FU : FuncUnit; // FI pipeline
//
// This file defines the itinerary class data for the PPC A2 processor.
@@ -24,126 +24,140 @@ def FU : FuncUnit; // FI pipeline
def PPCA2Itineraries : ProcessorItineraries<
- [XU, FU], [], [
- InstrItinData<IntSimple , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<IntGeneral , [InstrStage<1, [XU]>],
- [2, 1, 1]>,
- InstrItinData<IntCompare , [InstrStage<1, [XU]>],
- [2, 1, 1]>,
- InstrItinData<IntDivW , [InstrStage<1, [XU]>],
- [39, 1, 1]>,
- InstrItinData<IntDivD , [InstrStage<1, [XU]>],
- [71, 1, 1]>,
- InstrItinData<IntMulHW , [InstrStage<1, [XU]>],
- [5, 1, 1]>,
- InstrItinData<IntMulHWU , [InstrStage<1, [XU]>],
- [5, 1, 1]>,
- InstrItinData<IntMulLI , [InstrStage<1, [XU]>],
- [6, 1, 1]>,
- InstrItinData<IntRotate , [InstrStage<1, [XU]>],
- [2, 1, 1]>,
- InstrItinData<IntRotateD , [InstrStage<1, [XU]>],
- [2, 1, 1]>,
- InstrItinData<IntRotateDI , [InstrStage<1, [XU]>],
- [2, 1, 1]>,
- InstrItinData<IntShift , [InstrStage<1, [XU]>],
- [2, 1, 1]>,
- InstrItinData<IntTrapW , [InstrStage<1, [XU]>],
- [2, 1]>,
- InstrItinData<IntTrapD , [InstrStage<1, [XU]>],
- [2, 1]>,
- InstrItinData<BrB , [InstrStage<1, [XU]>],
- [6, 1, 1]>,
- InstrItinData<BrCR , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<BrMCR , [InstrStage<1, [XU]>],
- [5, 1, 1]>,
- InstrItinData<BrMCRX , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<LdStDCBA , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<LdStDCBF , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<LdStDCBI , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<LdStLoad , [InstrStage<1, [XU]>],
- [6, 1, 1]>,
- InstrItinData<LdStLoadUpd , [InstrStage<1, [XU]>],
- [6, 8, 1, 1]>,
- InstrItinData<LdStLDU , [InstrStage<1, [XU]>],
- [6, 1, 1]>,
- InstrItinData<LdStStore , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<LdStStoreUpd, [InstrStage<1, [XU]>],
- [2, 1, 1, 1]>,
- InstrItinData<LdStICBI, [InstrStage<1, [XU]>],
- [16, 1, 1]>,
- InstrItinData<LdStSTFD , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<LdStSTFDU , [InstrStage<1, [XU]>],
- [2, 1, 1, 1]>,
- InstrItinData<LdStLFD , [InstrStage<1, [XU]>],
- [7, 1, 1]>,
- InstrItinData<LdStLFDU , [InstrStage<1, [XU]>],
- [7, 9, 1, 1]>,
- InstrItinData<LdStLHA , [InstrStage<1, [XU]>],
- [6, 1, 1]>,
- InstrItinData<LdStLHAU , [InstrStage<1, [XU]>],
- [6, 8, 1, 1]>,
- InstrItinData<LdStLWARX , [InstrStage<1, [XU]>],
- [82, 1, 1]>, // L2 latency
- InstrItinData<LdStSTD , [InstrStage<1, [XU]>],
- [1, 1, 1]>,
- InstrItinData<LdStSTDU , [InstrStage<1, [XU]>],
- [2, 1, 1, 1]>,
- InstrItinData<LdStSTDCX , [InstrStage<1, [XU]>],
- [82, 1, 1]>, // L2 latency
- InstrItinData<LdStSTWCX , [InstrStage<1, [XU]>],
- [82, 1, 1]>, // L2 latency
- InstrItinData<LdStSync , [InstrStage<1, [XU]>],
- [6]>,
- InstrItinData<SprISYNC , [InstrStage<1, [XU]>],
- [16]>,
- InstrItinData<SprMTMSR , [InstrStage<1, [XU]>],
- [16, 1]>,
- InstrItinData<SprMFCR , [InstrStage<1, [XU]>],
- [6, 1]>,
- InstrItinData<SprMFMSR , [InstrStage<1, [XU]>],
- [4, 1]>,
- InstrItinData<SprMFSPR , [InstrStage<1, [XU]>],
- [6, 1]>,
- InstrItinData<SprMFTB , [InstrStage<1, [XU]>],
- [4, 1]>,
- InstrItinData<SprMTSPR , [InstrStage<1, [XU]>],
- [6, 1]>,
- InstrItinData<SprRFI , [InstrStage<1, [XU]>],
- [16]>,
- InstrItinData<SprSC , [InstrStage<1, [XU]>],
- [16]>,
- InstrItinData<FPGeneral , [InstrStage<1, [FU]>],
- [6, 1, 1]>,
- InstrItinData<FPAddSub , [InstrStage<1, [FU]>],
- [6, 1, 1]>,
- InstrItinData<FPCompare , [InstrStage<1, [FU]>],
- [5, 1, 1]>,
- InstrItinData<FPDivD , [InstrStage<1, [FU]>],
- [72, 1, 1]>,
- InstrItinData<FPDivS , [InstrStage<1, [FU]>],
- [59, 1, 1]>,
- InstrItinData<FPSqrt , [InstrStage<1, [FU]>],
- [69, 1, 1]>,
- InstrItinData<FPFused , [InstrStage<1, [FU]>],
- [6, 1, 1, 1]>,
- InstrItinData<FPRes , [InstrStage<1, [FU]>],
- [6, 1]>
+ [A2_XU, A2_FU], [], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [A2_XU]>],
+ [39, 0, 0]>,
+ InstrItinData<IIC_IntDivD, [InstrStage<1, [A2_XU]>],
+ [71, 0, 0]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [A2_XU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [A2_XU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntRotateD, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntRotateDI, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [A2_XU]>],
+ [2, 0]>,
+ InstrItinData<IIC_IntTrapD, [InstrStage<1, [A2_XU]>],
+ [2, 0]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [A2_XU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLDU, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStLDUX, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [A2_XU]>],
+ [0, 0, 0]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [A2_XU]>],
+ [16, 0, 0]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [A2_XU]>],
+ [0, 0, 0]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [A2_XU]>],
+ [7, 0, 0]>,
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [A2_XU]>],
+ [7, 9, 0, 0]>,
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [A2_XU]>],
+ [7, 9, 0, 0]>,
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [A2_XU]>],
+ [82, 0, 0]>, // L2 latency
+ InstrItinData<IIC_LdStSTD, [InstrStage<1, [A2_XU]>],
+ [0, 0, 0]>,
+ InstrItinData<IIC_LdStSTDU, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [A2_XU]>],
+ [82, 0, 0]>, // L2 latency
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [A2_XU]>],
+ [82, 0, 0]>, // L2 latency
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [A2_XU]>],
+ [6]>,
+ InstrItinData<IIC_SprISYNC, [InstrStage<1, [A2_XU]>],
+ [16]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [A2_XU]>],
+ [16, 0]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [A2_XU]>],
+ [6, 0]>,
+ InstrItinData<IIC_SprMFCRF, [InstrStage<1, [A2_XU]>],
+ [1, 0]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [A2_XU]>],
+ [4, 0]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [A2_XU]>],
+ [6, 0]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [A2_XU]>],
+ [4, 0]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [A2_XU]>],
+ [6, 0]>,
+ InstrItinData<IIC_SprRFI, [InstrStage<1, [A2_XU]>],
+ [16]>,
+ InstrItinData<IIC_SprSC, [InstrStage<1, [A2_XU]>],
+ [16]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [A2_FU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [A2_FU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [A2_FU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [A2_FU]>],
+ [72, 0, 0]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [A2_FU]>],
+ [59, 0, 0]>,
+ InstrItinData<IIC_FPSqrtD, [InstrStage<1, [A2_FU]>],
+ [69, 0, 0]>,
+ InstrItinData<IIC_FPSqrtS, [InstrStage<1, [A2_FU]>],
+ [65, 0, 0]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [A2_FU]>],
+ [6, 0, 0, 0]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [A2_FU]>],
+ [6, 0]>
]>;
// ===---------------------------------------------------------------------===//
// A2 machine model for scheduling and other instruction cost heuristics.
def PPCA2Model : SchedMachineModel {
- let IssueWidth = 1; // 2 micro-ops are dispatched per cycle.
+ let IssueWidth = 1; // 1 instruction is dispatched per cycle.
let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
let LoadLatency = 6; // Optimistic load latency assuming bypass.
// This is overriden by OperandCycles if the
diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td
index c189b9ed9a6c..dab89e3db353 100644
--- a/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -19,238 +19,285 @@
// * Decode & Dispatch
// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
-def DIS0 : FuncUnit; // Dispatch stage - insn 1
-def DIS1 : FuncUnit; // Dispatch stage - insn 2
+def E500_DIS0 : FuncUnit; // Dispatch stage - insn 1
+def E500_DIS1 : FuncUnit; // Dispatch stage - insn 2
// * Execute
// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
// Some instructions can only execute in SFX0 but not SFX1.
// The CFX has a bypass path, allowing non-divide instructions to execute
// while a divide instruction is executed.
-def SFX0 : FuncUnit; // Simple unit 0
-def SFX1 : FuncUnit; // Simple unit 1
-def BU : FuncUnit; // Branch unit
-def CFX_DivBypass
- : FuncUnit; // CFX divide bypass path
-def CFX_0 : FuncUnit; // CFX pipeline
-def LSU_0 : FuncUnit; // LSU pipeline
-def FPU_0 : FuncUnit; // FPU pipeline
+def E500_SFX0 : FuncUnit; // Simple unit 0
+def E500_SFX1 : FuncUnit; // Simple unit 1
+def E500_BU : FuncUnit; // Branch unit
+def E500_CFX_DivBypass
+ : FuncUnit; // CFX divide bypass path
+def E500_CFX_0 : FuncUnit; // CFX pipeline
+def E500_LSU_0 : FuncUnit; // LSU pipeline
+def E500_FPU_0 : FuncUnit; // FPU pipeline
-def CR_Bypass : Bypass;
+def E500_GPR_Bypass : Bypass;
+def E500_FPR_Bypass : Bypass;
+def E500_CR_Bypass : Bypass;
def PPCE500mcItineraries : ProcessorItineraries<
- [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0],
- [CR_Bypass, GPR_Bypass, FPR_Bypass], [
- InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [4, 1, 1], // Latency = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [4, 1, 1], // Latency = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [5, 1, 1], // Latency = 1 or 2
- [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0], 0>,
- InstrStage<14, [CFX_DivBypass]>],
- [17, 1, 1], // Latency=4..35, Repeat= 4..35
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<8, [FPU_0]>],
- [11], // Latency = 8
- [FPR_Bypass]>,
- InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<8, [FPU_0]>],
- [11, 1, 1], // Latency = 8
- [NoBypass, NoBypass, NoBypass]>,
- InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0]>],
- [7, 1, 1], // Latency = 4, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0]>],
- [7, 1, 1], // Latency = 4, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0]>],
- [7, 1, 1], // Latency = 4, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [4, 1, 1], // Latency = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [4, 1, 1], // Latency = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [SFX0]>],
- [5, 1], // Latency = 2, Repeat rate = 2
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [BU]>],
- [4, 1], // Latency = 1
- [NoBypass, GPR_Bypass]>,
- InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [BU]>],
- [4, 1, 1], // Latency = 1
- [CR_Bypass, CR_Bypass, CR_Bypass]>,
- InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [BU]>],
- [4, 1], // Latency = 1
- [CR_Bypass, CR_Bypass]>,
- InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [4, 1, 1], // Latency = 1
- [CR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [NoBypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 1, 1], // Latency = 4
- [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 1, 1], // Latency = 4
- [FPR_Bypass, GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 1], // Latency = r+3
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<3, [LSU_0]>],
- [6, 1, 1], // Latency = 3, Repeat rate = 3
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [6, 1], // Latency = 3
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>]>,
- InstrItinData<SprMFSR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [SFX0]>],
- [7, 1],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [SFX0, SFX1]>],
- [5, 1], // Latency = 2, Repeat rate = 4
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMTSR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0]>],
- [5, 1],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0], 0>]>,
- InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<5, [SFX0]>],
- [8, 1],
- [GPR_Bypass, CR_Bypass]>,
- InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [SFX0]>],
- [7, 1], // Latency = 4, Repeat rate = 4
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [4, 1], // Latency = 1, Repeat rate = 1
- [GPR_Bypass, CR_Bypass]>,
- InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [SFX0]>],
- [7, 1], // Latency = 4, Repeat rate = 4
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [4, 1], // Latency = 1, Repeat rate = 1
- [CR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMTSRIN , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0]>],
- [4, 1],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [FPU_0]>],
- [11, 1, 1], // Latency = 8, Repeat rate = 2
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [FPU_0]>],
- [13, 1, 1], // Latency = 10, Repeat rate = 4
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [FPU_0]>],
- [11, 1, 1], // Latency = 8, Repeat rate = 2
- [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<68, [FPU_0]>],
- [71, 1, 1], // Latency = 68, Repeat rate = 68
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<38, [FPU_0]>],
- [41, 1, 1], // Latency = 38, Repeat rate = 38
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [FPU_0]>],
- [13, 1, 1, 1], // Latency = 10, Repeat rate = 4
- [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<38, [FPU_0]>],
- [41, 1], // Latency = 38, Repeat rate = 38
- [FPR_Bypass, FPR_Bypass]>
+ [E500_DIS0, E500_DIS1, E500_SFX0, E500_SFX1, E500_BU, E500_CFX_DivBypass,
+ E500_CFX_0, E500_LSU_0, E500_FPU_0],
+ [E500_CR_Bypass, E500_GPR_Bypass, E500_FPR_Bypass], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [5, 1, 1], // Latency = 1 or 2
+ [E500_CR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0], 0>,
+ InstrStage<14, [E500_CFX_DivBypass]>],
+ [17, 1, 1], // Latency=4..35, Repeat= 4..35
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMFFS, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<8, [E500_FPU_0]>],
+ [11], // Latency = 8
+ [E500_FPR_Bypass]>,
+ InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<8, [E500_FPU_0]>],
+ [11, 1, 1], // Latency = 8
+ [NoBypass, NoBypass, NoBypass]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_SFX0]>],
+ [5, 1], // Latency = 2, Repeat rate = 2
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_BU]>],
+ [4, 1], // Latency = 1
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_BU]>],
+ [4, 1, 1], // Latency = 1
+ [E500_CR_Bypass,
+ E500_CR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_BU]>],
+ [4, 1], // Latency = 1
+ [E500_CR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_CR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3, Repeat rate = 1
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1, 1], // Latency = 3
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1, 1], // Latency = 3
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [E500_FPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [E500_FPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [E500_FPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLMW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1], // Latency = r+3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<3, [E500_LSU_0]>],
+ [6, 1, 1], // Latency = 3, Repeat rate = 3
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>]>,
+ InstrItinData<IIC_SprMFSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_SFX0]>],
+ [7, 1],
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_SFX0, E500_SFX1]>],
+ [5, 1], // Latency = 2, Repeat rate = 4
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0]>],
+ [5, 1],
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0], 0>]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<5, [E500_SFX0]>],
+ [8, 1],
+ [E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFCRF, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<5, [E500_SFX0]>],
+ [8, 1],
+ [E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [E500_CR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSRIN, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0]>],
+ [4, 1],
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_FPU_0]>],
+ [11, 1, 1], // Latency = 8, Repeat rate = 2
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_FPU_0]>],
+ [13, 1, 1], // Latency = 10, Repeat rate = 4
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_FPU_0]>],
+ [11, 1, 1], // Latency = 8, Repeat rate = 2
+ [E500_CR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<68, [E500_FPU_0]>],
+ [71, 1, 1], // Latency = 68, Repeat rate = 68
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<38, [E500_FPU_0]>],
+ [41, 1, 1], // Latency = 38, Repeat rate = 38
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_FPU_0]>],
+ [13, 1, 1, 1], // Latency = 10, Repeat rate = 4
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass,
+ E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<38, [E500_FPU_0]>],
+ [41, 1], // Latency = 38, Repeat rate = 38
+ [E500_FPR_Bypass, E500_FPR_Bypass]>
]>;
// ===---------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td
index 7a24d20323da..de097d9d8cf5 100644
--- a/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -20,280 +20,344 @@
// * Decode & Dispatch
// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
-// def DIS0 : FuncUnit;
-// def DIS1 : FuncUnit;
+def E5500_DIS0 : FuncUnit;
+def E5500_DIS1 : FuncUnit;
// * Execute
// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
// The CFX has a bypass path, allowing non-divide instructions to execute
// while a divide instruction is being executed.
-// def SFX0 : FuncUnit; // Simple unit 0
-// def SFX1 : FuncUnit; // Simple unit 1
-// def BU : FuncUnit; // Branch unit
-// def CFX_DivBypass
-// : FuncUnit; // CFX divide bypass path
-// def CFX_0 : FuncUnit; // CFX pipeline stage 0
+def E5500_SFX0 : FuncUnit; // Simple unit 0
+def E5500_SFX1 : FuncUnit; // Simple unit 1
+def E5500_BU : FuncUnit; // Branch unit
+def E5500_CFX_DivBypass
+ : FuncUnit; // CFX divide bypass path
+def E5500_CFX_0 : FuncUnit; // CFX pipeline stage 0
-def CFX_1 : FuncUnit; // CFX pipeline stage 1
+def E5500_CFX_1 : FuncUnit; // CFX pipeline stage 1
-// def LSU_0 : FuncUnit; // LSU pipeline
-// def FPU_0 : FuncUnit; // FPU pipeline
+def E5500_LSU_0 : FuncUnit; // LSU pipeline
+def E5500_FPU_0 : FuncUnit; // FPU pipeline
-// def CR_Bypass : Bypass;
+def E5500_GPR_Bypass : Bypass;
+def E5500_FPR_Bypass : Bypass;
+def E5500_CR_Bypass : Bypass;
def PPCE5500Itineraries : ProcessorItineraries<
- [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1,
- LSU_0, FPU_0],
- [CR_Bypass, GPR_Bypass, FPR_Bypass], [
- InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [5, 2, 2], // Latency = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [5, 2, 2], // Latency = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [6, 2, 2], // Latency = 1 or 2
- [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntDivD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0], 0>,
- InstrStage<26, [CFX_DivBypass]>],
- [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0], 0>,
- InstrStage<16, [CFX_DivBypass]>],
- [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [FPU_0]>],
- [11], // Latency = 7, Repeat rate = 1
- [FPR_Bypass]>,
- InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<7, [FPU_0]>],
- [11, 2, 2], // Latency = 7, Repeat rate = 7
- [NoBypass, NoBypass, NoBypass]>,
- InstrItinData<IntMulHD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0], 0>,
- InstrStage<2, [CFX_1]>],
- [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0], 0>,
- InstrStage<1, [CFX_1]>],
- [8, 2, 2], // Latency = 4, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0], 0>,
- InstrStage<1, [CFX_1]>],
- [8, 2, 2], // Latency = 4, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0], 0>,
- InstrStage<2, [CFX_1]>],
- [8, 2, 2], // Latency = 4 or 5, Repeat = 2
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [5, 2, 2], // Latency = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotateD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [SFX0, SFX1]>],
- [6, 2, 2], // Latency = 2, Repeat rate = 2
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotateDI , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [5, 2, 2], // Latency = 1, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [SFX0, SFX1]>],
- [6, 2, 2], // Latency = 2, Repeat rate = 2
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [SFX0]>],
- [6, 2], // Latency = 2, Repeat rate = 2
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [BU]>],
- [5, 2], // Latency = 1
- [NoBypass, GPR_Bypass]>,
- InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [BU]>],
- [5, 2, 2], // Latency = 1
- [CR_Bypass, CR_Bypass, CR_Bypass]>,
- InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [BU]>],
- [5, 2], // Latency = 1
- [CR_Bypass, CR_Bypass]>,
- InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0]>],
- [5, 2, 2], // Latency = 1
- [CR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStLD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLDARX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<3, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLDU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [8, 2, 2], // Latency = 4, Repeat rate = 1
- [FPR_Bypass, GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [8, 2, 2], // Latency = 4, Repeat rate = 1
- [FPR_Bypass, GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [GPR_Bypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [LSU_0]>],
- [8, 2], // Latency = r+3, Repeat rate = r+3
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<3, [LSU_0]>],
- [7, 2, 2], // Latency = 3, Repeat rate = 3
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTDCX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTDU , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, GPR_Bypass],
- 2>, // 2 micro-ops
- InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>],
- [7, 2], // Latency = 3, Repeat rate = 1
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0]>]>,
- InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [CFX_0]>],
- [6, 2], // Latency = 2, Repeat rate = 4
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [LSU_0], 0>]>,
- InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<5, [CFX_0]>],
- [9, 2], // Latency = 5, Repeat rate = 5
- [GPR_Bypass, CR_Bypass]>,
- InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [SFX0]>],
- [8, 2], // Latency = 4, Repeat rate = 4
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [CFX_0]>],
- [5], // Latency = 1, Repeat rate = 1
- [GPR_Bypass]>,
- InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<4, [CFX_0]>],
- [8, 2], // Latency = 4, Repeat rate = 4
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [SFX0, SFX1]>],
- [5], // Latency = 1, Repeat rate = 1
- [GPR_Bypass]>,
- InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [FPU_0]>],
- [11, 2, 2], // Latency = 7, Repeat rate = 1
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [FPU_0]>],
- [11, 2, 2], // Latency = 7, Repeat rate = 1
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [FPU_0]>],
- [11, 2, 2], // Latency = 7, Repeat rate = 1
- [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<31, [FPU_0]>],
- [39, 2, 2], // Latency = 35, Repeat rate = 31
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<16, [FPU_0]>],
- [24, 2, 2], // Latency = 20, Repeat rate = 16
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<1, [FPU_0]>],
- [11, 2, 2, 2], // Latency = 7, Repeat rate = 1
- [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>,
- InstrStage<2, [FPU_0]>],
- [12, 2], // Latency = 8, Repeat rate = 2
- [FPR_Bypass, FPR_Bypass]>
+ [E5500_DIS0, E5500_DIS1, E5500_SFX0, E5500_SFX1, E5500_BU,
+ E5500_CFX_DivBypass, E5500_CFX_0, E5500_CFX_1,
+ E5500_LSU_0, E5500_FPU_0],
+ [E5500_CR_Bypass, E5500_GPR_Bypass, E5500_FPR_Bypass], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [6, 2, 2], // Latency = 1 or 2
+ [E5500_CR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<26, [E5500_CFX_DivBypass]>],
+ [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<16, [E5500_CFX_DivBypass]>],
+ [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMFFS, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass]>,
+ InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<7, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 7
+ [NoBypass, NoBypass, NoBypass]>,
+ InstrItinData<IIC_IntMulHD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<2, [E5500_CFX_1]>],
+ [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<1, [E5500_CFX_1]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<1, [E5500_CFX_1]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<2, [E5500_CFX_1]>],
+ [8, 2, 2], // Latency = 4 or 5, Repeat = 2
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotateD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_SFX0, E5500_SFX1]>],
+ [6, 2, 2], // Latency = 2, Repeat rate = 2
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotateDI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_SFX0, E5500_SFX1]>],
+ [6, 2, 2], // Latency = 2, Repeat rate = 2
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_SFX0]>],
+ [6, 2], // Latency = 2, Repeat rate = 2
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_BU]>],
+ [5, 2], // Latency = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_BU]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_CR_Bypass,
+ E5500_CR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_BU]>],
+ [5, 2], // Latency = 1
+ [E5500_CR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_CR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLDARX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<3, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 3
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLMW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<4, [E5500_LSU_0]>],
+ [8, 2], // Latency = r+3, Repeat rate = r+3
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<3, [E5500_LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 3
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_CFX_0]>],
+ [6, 2], // Latency = 2, Repeat rate = 4
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0], 0>]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<5, [E5500_CFX_0]>],
+ [9, 2], // Latency = 5, Repeat rate = 5
+ [E5500_GPR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFCRF, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<5, [E5500_CFX_0]>],
+ [9, 2], // Latency = 5, Repeat rate = 5
+ [E5500_GPR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<4, [E5500_SFX0]>],
+ [8, 2], // Latency = 4, Repeat rate = 4
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<4, [E5500_CFX_0]>],
+ [8, 2], // Latency = 4, Repeat rate = 4
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [E5500_GPR_Bypass]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_CR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<31, [E5500_FPU_0]>],
+ [39, 2, 2], // Latency = 35, Repeat rate = 31
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<16, [E5500_FPU_0]>],
+ [24, 2, 2], // Latency = 20, Repeat rate = 16
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass,
+ E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_FPU_0]>],
+ [12, 2], // Latency = 8, Repeat rate = 2
+ [E5500_FPR_Bypass, E5500_FPR_Bypass]>
]>;
// ===---------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
index 72a0a392631a..21efd8f8f6c9 100644
--- a/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -11,61 +11,70 @@
//
//===----------------------------------------------------------------------===//
+def G3_BPU : FuncUnit; // Branch unit
+def G3_SLU : FuncUnit; // Store/load unit
+def G3_SRU : FuncUnit; // special register unit
+def G3_IU1 : FuncUnit; // integer unit 1 (simple)
+def G3_IU2 : FuncUnit; // integer unit 2 (complex)
+def G3_FPU1 : FuncUnit; // floating point unit 1
def G3Itineraries : ProcessorItineraries<
- [IU1, IU2, FPU1, BPU, SRU, SLU], [], [
- InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
- InstrItinData<IntMFFS , [InstrStage<1, [FPU1]>]>,
- InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
- InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
- InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
- InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
- InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
- InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
- InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
- InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
- InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
- InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
- InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>,
- InstrItinData<LdStSync , [InstrStage<3, [SLU]>]>,
- InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
- InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
- InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprTLBSYNC , [InstrStage<3, [SRU]>]>,
- InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
- InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
- InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
- InstrItinData<SprMFTB , [InstrStage<3, [SRU]>]>,
- InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
- InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
- InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>,
- InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
- InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
- InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
- InstrItinData<FPFused , [InstrStage<2, [FPU1]>]>,
- InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>
+ [G3_IU1, G3_IU2, G3_FPU1, G3_BPU, G3_SRU, G3_SLU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<19, [G3_IU1]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<3, [G3_FPU1]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<5, [G3_IU1]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<6, [G3_IU1]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<3, [G3_IU1]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<2, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G3_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_LdStDCBA , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStDCBI , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<34, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<8, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFSR , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprRFI , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<31, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<17, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<2, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<10, [G3_FPU1]>]>
]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
index fc9120dfa290..340773ef7876 100644
--- a/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -11,71 +11,86 @@
//
//===----------------------------------------------------------------------===//
+def G4_BPU : FuncUnit; // Branch unit
+def G4_SLU : FuncUnit; // Store/load unit
+def G4_SRU : FuncUnit; // special register unit
+def G4_IU1 : FuncUnit; // integer unit 1 (simple)
+def G4_IU2 : FuncUnit; // integer unit 2 (complex)
+def G4_FPU1 : FuncUnit; // floating point unit 1
+def G4_VPU : FuncUnit; // vector permutation unit
+def G4_VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def G4_VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def G4_VFPU : FuncUnit; // vector floating point unit
+
def G4Itineraries : ProcessorItineraries<
- [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [
- InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
- InstrItinData<IntMFFS , [InstrStage<3, [FPU1]>]>,
- InstrItinData<IntMFVSCR , [InstrStage<1, [VIU1]>]>,
- InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
- InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
- InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
- InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
- InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
- InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
- InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
- InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
- InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
- InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
- InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTVEBX , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStSTWCX , [InstrStage<5, [SLU]>]>,
- InstrItinData<LdStSync , [InstrStage<8, [SLU]>]>,
- InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
- InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
- InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprTLBSYNC , [InstrStage<8, [SRU]>]>,
- InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
- InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
- InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
- InstrItinData<SprMFTB , [InstrStage<1, [SRU]>]>,
- InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
- InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
- InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
- InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>,
- InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
- InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
- InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
- InstrItinData<FPFused , [InstrStage<1, [FPU1]>]>,
- InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>,
- InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
- InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
- InstrItinData<VecFPCompare, [InstrStage<1, [VIU1]>]>,
- InstrItinData<VecComplex , [InstrStage<3, [VIU2]>]>,
- InstrItinData<VecPerm , [InstrStage<1, [VPU]>]>,
- InstrItinData<VecFPRound , [InstrStage<4, [VFPU]>]>,
- InstrItinData<VecVSL , [InstrStage<1, [VIU1]>]>,
- InstrItinData<VecVSR , [InstrStage<1, [VIU1]>]>
+ [G4_IU1, G4_IU2, G4_SLU, G4_SRU, G4_BPU, G4_FPU1,
+ G4_VIU1, G4_VIU2, G4_VPU, G4_VFPU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<19, [G4_IU1]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<3, [G4_FPU1]>]>,
+ InstrItinData<IIC_IntMFVSCR , [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<3, [G4_FPU1]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<5, [G4_IU1]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<6, [G4_IU1]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<3, [G4_IU1]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<2, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G4_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStDCBI , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStDSS , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<34, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<5, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<8, [G4_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFSR , [InstrStage<3, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<8, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprRFI , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<31, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<17, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<10, [G4_FPU1]>]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_VecFP , [InstrStage<4, [G4_VFPU]>]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<3, [G4_VIU2]>]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<1, [G4_VPU]>]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<4, [G4_VFPU]>]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<1, [G4_VIU1]>]>
]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
index a4e82ce23e6f..1d9f13fcb850 100644
--- a/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -11,78 +11,102 @@
//
//===----------------------------------------------------------------------===//
-def IU3 : FuncUnit; // integer unit 3 (7450 simple)
-def IU4 : FuncUnit; // integer unit 4 (7450 simple)
+def G4P_BPU : FuncUnit; // Branch unit
+def G4P_SLU : FuncUnit; // Store/load unit
+def G4P_SRU : FuncUnit; // special register unit
+def G4P_IU1 : FuncUnit; // integer unit 1 (simple)
+def G4P_IU2 : FuncUnit; // integer unit 2 (complex)
+def G4P_IU3 : FuncUnit; // integer unit 3 (simple)
+def G4P_IU4 : FuncUnit; // integer unit 4 (simple)
+def G4P_FPU1 : FuncUnit; // floating point unit 1
+def G4P_VPU : FuncUnit; // vector permutation unit
+def G4P_VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def G4P_VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def G4P_VFPU : FuncUnit; // vector floating point unit
def G4PlusItineraries : ProcessorItineraries<
- [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [
- InstrItinData<IntSimple , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>,
- InstrItinData<IntMFFS , [InstrStage<5, [FPU1]>]>,
- InstrItinData<IntMFVSCR , [InstrStage<2, [VFPU]>]>,
- InstrItinData<IntMTFSB0 , [InstrStage<5, [FPU1]>]>,
- InstrItinData<IntMulHW , [InstrStage<4, [IU2]>]>,
- InstrItinData<IntMulHWU , [InstrStage<4, [IU2]>]>,
- InstrItinData<IntMulLI , [InstrStage<3, [IU2]>]>,
- InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<IntShift , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
- InstrItinData<BrCR , [InstrStage<2, [IU2]>]>,
- InstrItinData<BrMCR , [InstrStage<2, [IU2]>]>,
- InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>,
- InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>,
- InstrItinData<LdStSTFD , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTFDU , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>,
- InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>,
- InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLHAU , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>,
- InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
- InstrItinData<SprISYNC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<SprMFSR , [InstrStage<4, [IU2]>]>,
- InstrItinData<SprMTMSR , [InstrStage<2, [IU2]>]>,
- InstrItinData<SprMTSR , [InstrStage<2, [IU2]>]>,
- InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
- InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
- InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
- InstrItinData<SprMFSPR , [InstrStage<4, [IU2]>]>,
- InstrItinData<SprMFTB , [InstrStage<5, [IU2]>]>,
- InstrItinData<SprMTSPR , [InstrStage<2, [IU2]>]>,
- InstrItinData<SprMTSRIN , [InstrStage<2, [IU2]>]>,
- InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
- InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>,
- InstrItinData<FPAddSub , [InstrStage<5, [FPU1]>]>,
- InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>,
- InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>,
- InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>,
- InstrItinData<FPFused , [InstrStage<5, [FPU1]>]>,
- InstrItinData<FPRes , [InstrStage<14, [FPU1]>]>,
- InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
- InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
- InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
- InstrItinData<VecComplex , [InstrStage<4, [VIU2]>]>,
- InstrItinData<VecPerm , [InstrStage<2, [VPU]>]>,
- InstrItinData<VecFPRound , [InstrStage<4, [VIU1]>]>,
- InstrItinData<VecVSL , [InstrStage<2, [VPU]>]>,
- InstrItinData<VecVSR , [InstrStage<2, [VPU]>]>
+ [G4P_IU1, G4P_IU2, G4P_IU3, G4P_IU4, G4P_BPU, G4P_SLU, G4P_FPU1,
+ G4P_VFPU, G4P_VIU1, G4P_VIU2, G4P_VPU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<23, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_IntMFVSCR , [InstrStage<2, [G4P_VFPU]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<3, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<2, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<2, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G4P_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStDCBI , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStDSS , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<3, [G4P_IU2]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<4, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<4, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<4, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<37, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLWA , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTD , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTDCX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<35, [G4P_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<0, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_SprMFSR , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<3, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<5, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprRFI , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<0, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<35, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<21, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<14, [G4P_FPU1]>]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<1, [G4P_VIU1]>]>,
+ InstrItinData<IIC_VecFP , [InstrStage<4, [G4P_VFPU]>]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<2, [G4P_VFPU]>]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<4, [G4P_VIU2]>]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<2, [G4P_VPU]>]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<4, [G4P_VIU1]>]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<2, [G4P_VPU]>]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<2, [G4P_VPU]>]>
]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index c64998d52a0c..a3b73ab4454f 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -11,90 +11,110 @@
//
//===----------------------------------------------------------------------===//
+def G5_BPU : FuncUnit; // Branch unit
+def G5_SLU : FuncUnit; // Store/load unit
+def G5_SRU : FuncUnit; // special register unit
+def G5_IU1 : FuncUnit; // integer unit 1 (simple)
+def G5_IU2 : FuncUnit; // integer unit 2 (complex)
+def G5_FPU1 : FuncUnit; // floating point unit 1
+def G5_FPU2 : FuncUnit; // floating point unit 2
+def G5_VPU : FuncUnit; // vector permutation unit
+def G5_VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def G5_VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def G5_VFPU : FuncUnit; // vector floating point unit
+
def G5Itineraries : ProcessorItineraries<
- [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [
- InstrItinData<IntSimple , [InstrStage<2, [IU1, IU2]>]>,
- InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>,
- InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>,
- InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>,
- InstrItinData<IntDivW , [InstrStage<36, [IU1]>]>,
- InstrItinData<IntMFFS , [InstrStage<6, [IU2]>]>,
- InstrItinData<IntMFVSCR , [InstrStage<1, [VFPU]>]>,
- InstrItinData<IntMTFSB0 , [InstrStage<6, [FPU1, FPU2]>]>,
- InstrItinData<IntMulHD , [InstrStage<7, [IU1, IU2]>]>,
- InstrItinData<IntMulHW , [InstrStage<5, [IU1, IU2]>]>,
- InstrItinData<IntMulHWU , [InstrStage<5, [IU1, IU2]>]>,
- InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>,
- InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>,
- InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>,
- InstrItinData<IntRotateDI , [InstrStage<2, [IU1, IU2]>]>,
- InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>,
- InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>,
- InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<IntTrapW , [InstrStage<1, [IU1, IU2]>]>,
- InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
- InstrItinData<BrCR , [InstrStage<4, [BPU]>]>,
- InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>,
- InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>,
- InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>,
- InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>,
- InstrItinData<LdStSTFD , [InstrStage<4, [SLU]>]>,
- InstrItinData<LdStSTFDU , [InstrStage<4, [SLU]>]>,
- InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLDU , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>,
- InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>,
- InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>,
- InstrItinData<LdStLHAU , [InstrStage<5, [SLU]>]>,
- InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>,
- InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>,
- InstrItinData<LdStLWARX , [InstrStage<11, [SLU]>]>,
- InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work
- InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>,
- InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>,
- InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>,
- InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>,
- InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>,
- InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
- InstrItinData<SprISYNC , [InstrStage<40, [SLU]>]>, // needs work
- InstrItinData<SprMFSR , [InstrStage<3, [SLU]>]>,
- InstrItinData<SprMTMSR , [InstrStage<3, [SLU]>]>,
- InstrItinData<SprMTSR , [InstrStage<3, [SLU]>]>,
- InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
- InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
- InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
- InstrItinData<SprMFSPR , [InstrStage<3, [IU2]>]>,
- InstrItinData<SprMFTB , [InstrStage<10, [IU2]>]>,
- InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>,
- InstrItinData<SprSC , [InstrStage<1, [IU2]>]>,
- InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>,
- InstrItinData<FPAddSub , [InstrStage<6, [FPU1, FPU2]>]>,
- InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>,
- InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>,
- InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>,
- InstrItinData<FPFused , [InstrStage<6, [FPU1, FPU2]>]>,
- InstrItinData<FPRes , [InstrStage<6, [FPU1, FPU2]>]>,
- InstrItinData<FPSqrt , [InstrStage<40, [FPU1, FPU2]>]>,
- InstrItinData<VecGeneral , [InstrStage<2, [VIU1]>]>,
- InstrItinData<VecFP , [InstrStage<8, [VFPU]>]>,
- InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
- InstrItinData<VecComplex , [InstrStage<5, [VIU2]>]>,
- InstrItinData<VecPerm , [InstrStage<3, [VPU]>]>,
- InstrItinData<VecFPRound , [InstrStage<8, [VFPU]>]>,
- InstrItinData<VecVSL , [InstrStage<2, [VIU1]>]>,
- InstrItinData<VecVSR , [InstrStage<3, [VPU]>]>
+ [G5_IU1, G5_IU2, G5_SLU, G5_BPU, G5_FPU1, G5_FPU2,
+ G5_VFPU, G5_VIU1, G5_VIU2, G5_VPU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<3, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntDivD , [InstrStage<68, [G5_IU1]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<36, [G5_IU1]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<6, [G5_IU2]>]>,
+ InstrItinData<IIC_IntMFVSCR , [InstrStage<1, [G5_VFPU]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_IntMulHD , [InstrStage<7, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<5, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<5, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<4, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntRFID , [InstrStage<1, [G5_IU2]>]>,
+ InstrItinData<IIC_IntRotateD , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntRotateDI , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<4, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntTrapD , [InstrStage<1, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<1, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G5_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<4, [G5_BPU]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<2, [G5_BPU]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<3, [G5_BPU]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStDSS , [InstrStage<10, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<40, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<4, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<4, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLD , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLDU , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLDUX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLDARX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<64, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLWA , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSLBIA , [InstrStage<40, [G5_SLU]>]>, // needs work
+ InstrItinData<IIC_LdStSLBIE , [InstrStage<2, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTD , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTDCX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<35, [G5_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<40, [G5_SLU]>]>, // needs work
+ InstrItinData<IIC_SprMFSR , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<2, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFCRF , [InstrStage<2, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<3, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<10, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<8, [G5_IU2]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<1, [G5_IU2]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<8, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<33, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<33, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPSqrtD , [InstrStage<40, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPSqrtS , [InstrStage<40, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<2, [G5_VIU1]>]>,
+ InstrItinData<IIC_VecFP , [InstrStage<8, [G5_VFPU]>]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<2, [G5_VFPU]>]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<5, [G5_VIU2]>]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<3, [G5_VPU]>]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<8, [G5_VFPU]>]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<2, [G5_VIU1]>]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<3, [G5_VPU]>]>
]>;
// ===---------------------------------------------------------------------===//
-// e5500 machine model for scheduling and other instruction cost heuristics.
+// G5 machine model for scheduling and other instruction cost heuristics.
def G5Model : SchedMachineModel {
let IssueWidth = 4; // 4 (non-branch) instructions are dispatched per cycle.
diff --git a/lib/Target/PowerPC/PPCScheduleP7.td b/lib/Target/PowerPC/PPCScheduleP7.td
new file mode 100644
index 000000000000..d3e426975ec0
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleP7.td
@@ -0,0 +1,385 @@
+//===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the POWER7 processor.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// IBM POWER7 multicore server processor
+// B. Sinharoy, et al.
+// IBM J. Res. & Dev. (55) 3. May/June 2011.
+
+// Scheduling for the P7 involves tracking two types of resources:
+// 1. The dispatch bundle slots
+// 2. The functional unit resources
+
+// Dispatch units:
+def P7_DU1 : FuncUnit;
+def P7_DU2 : FuncUnit;
+def P7_DU3 : FuncUnit;
+def P7_DU4 : FuncUnit;
+def P7_DU5 : FuncUnit;
+def P7_DU6 : FuncUnit;
+
+def P7_LS1 : FuncUnit; // Load/Store pipeline 1
+def P7_LS2 : FuncUnit; // Load/Store pipeline 2
+
+def P7_FX1 : FuncUnit; // FX pipeline 1
+def P7_FX2 : FuncUnit; // FX pipeline 2
+
+// VS pipeline 1 (vector integer ops. always here)
+def P7_VS1 : FuncUnit; // VS pipeline 1
+// VS pipeline 2 (128-bit stores and perms. here)
+def P7_VS2 : FuncUnit; // VS pipeline 2
+
+def P7_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
+def P7_BRU : FuncUnit; // BR unit
+
+// Notes:
+// Each LSU pipeline can also execute FX add and logical instructions.
+// Each LSU pipeline can complete a load or store in one cycle.
+//
+// Each store is broken into two parts, AGEN goes to the LSU while a
+// "data steering" op. goes to the FXU or VSU.
+//
+// FX loads have a two cycle load-to-use latency (so one "bubble" cycle).
+// VSU loads have a three cycle load-to-use latency (so two "bubble" cycle).
+//
+// Frequent FX ops. take only one cycle and results can be used again in the
+// next cycle (there is a self-bypass). Getting results from the other FX
+// pipeline takes an additional cycle.
+//
+// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles
+// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops.
+// Dispatch of an instruction to VS1 that uses four single prec. inputs
+// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any
+// floating point instruction.
+//
+// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles
+// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline
+// (unlike on the POWER6).
+//
+// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP
+// share the same write-back, and have a 5-cycle latency difference, so the
+// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP
+// op. has been dispatched to VS1.
+//
+// Three cycles after an L1 cache hit, a dependent VSU instruction can issue.
+//
+// Instruction dispatch groups have (at most) four non-branch instructions, and
+// two branches. Unlike on the POWER4/5, a branch does not automatically
+// end the dispatch group, but a second branch must be the last in the group.
+
+def P7Itineraries : ProcessorItineraries<
+ [P7_DU1, P7_DU2, P7_DU3, P7_DU4, P7_DU5, P7_DU6,
+ P7_LS1, P7_LS2, P7_FX1, P7_FX2, P7_VS1, P7_VS2, P7_CRU, P7_BRU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2,
+ P7_LS1, P7_LS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ // FIXME: Add record-form itinerary data.
+ InstrItinData<IIC_IntDivW , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<36, [P7_FX1, P7_FX2]>],
+ [36, 1, 1]>,
+ InstrItinData<IIC_IntDivD , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<68, [P7_FX1, P7_FX2]>],
+ [68, 1, 1]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntRotateD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntShift , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1]>,
+ InstrItinData<IIC_IntTrapD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
+ InstrStage<1, [P7_BRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_BrCR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_CRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
+ InstrStage<1, [P7_BRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
+ InstrStage<1, [P7_BRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 2, 1, 1]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStLDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 2, 1, 1]>,
+ InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 4, 1, 1]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 4, 1, 1]>,
+ InstrItinData<IIC_LdStLWA , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_VS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_CRU]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 1]>, // mtcr
+ InstrItinData<IIC_SprMFCR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_CRU]>],
+ [6, 1]>,
+ InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_CRU]>],
+ [3, 1]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_FX1]>],
+ [4, 1]>, // mtctr
+ InstrItinData<IIC_FPGeneral , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [8, 1, 1]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [33, 1, 1]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [27, 1, 1]>,
+ InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [44, 1, 1]>,
+ InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [32, 1, 1]>,
+ InstrItinData<IIC_FPFused , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1, 1]>,
+ InstrItinData<IIC_FPRes , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecFP , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [7, 1, 1]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<1, [P7_DU1, P7_DU2], 0>,
+ InstrStage<1, [P7_VS2]>],
+ [3, 1, 1]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// P7 machine model for scheduling and other instruction cost heuristics.
+
+def P7Model : SchedMachineModel {
+ let IssueWidth = 6; // 4 (non-branch) instructions are dispatched per cycle.
+ // Note that the dispatch bundle size is 6 (including
+ // branches), but the total internal issue bandwidth per
+ // cycle (from all queues) is 8.
+
+ let MinLatency = 0; // Out-of-order dispatch.
+ let LoadLatency = 3; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 16;
+
+ let Itineraries = P7Itineraries;
+}
+
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
index d4258b4a0eb1..dc1674214769 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -11,13 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "powerpc-selectiondag-info"
#include "PPCTargetMachine.h"
using namespace llvm;
-PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+#define DEBUG_TYPE "powerpc-selectiondag-info"
+
+PPCSelectionDAGInfo::PPCSelectionDAGInfo(const DataLayout *DL)
+ : TargetSelectionDAGInfo(DL) {}
-PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {
-}
+PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {}
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index 341b69cdfb5f..b2e7f3b5f2ac 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -22,7 +22,7 @@ class PPCTargetMachine;
class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM);
+ explicit PPCSelectionDAGInfo(const DataLayout *DL);
~PPCSelectionDAGInfo();
};
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 7231ab101a26..b51512d335fc 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -17,28 +17,72 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include <cstdlib>
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-subtarget"
+
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "PPCGenSubtargetInfo.inc"
-using namespace llvm;
+/// Return the datalayout string of a subtarget.
+static std::string getDataLayoutString(const PPCSubtarget &ST) {
+ const Triple &T = ST.getTargetTriple();
-PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit)
- : PPCGenSubtargetInfo(TT, CPU, FS)
- , IsPPC64(is64Bit)
- , TargetTriple(TT) {
+ std::string Ret;
+
+ // Most PPC* platforms are big endian, PPC64LE is little endian.
+ if (ST.isLittleEndian())
+ Ret = "e";
+ else
+ Ret = "E";
+
+ Ret += DataLayout::getManglingComponent(T);
+
+ // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
+ // pointers.
+ if (!ST.isPPC64() || T.getOS() == Triple::Lv2)
+ Ret += "-p:32:32";
+
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ if (ST.isPPC64() || ST.isSVR4ABI())
+ Ret += "-i64:64";
+ else
+ Ret += "-f64:32:64";
+
+ // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
+ if (ST.isPPC64())
+ Ret += "-n32:64";
+ else
+ Ret += "-n32";
+
+ return Ret;
+}
+
+PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
+ return *this;
}
+PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, PPCTargetMachine &TM,
+ bool is64Bit, CodeGenOpt::Level OptLevel)
+ : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT),
+ OptLevel(OptLevel),
+ FrameLowering(initializeSubtargetDependencies(CPU, FS)),
+ DL(getDataLayoutString(*this)), InstrInfo(*this), JITInfo(*this),
+ TLInfo(TM), TSInfo(&DL) {}
+
/// SetJITMode - This is called to inform the subtarget info that we are
/// producing code for the JIT.
void PPCSubtarget::SetJITMode() {
@@ -73,8 +117,10 @@ void PPCSubtarget::initializeEnvironment() {
HasMFOCRF = false;
Has64BitSupport = false;
Use64BitRegs = false;
+ UseCRBits = false;
HasAltivec = false;
HasQPX = false;
+ HasVSX = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
@@ -124,6 +170,14 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
FullFS = "+64bit";
}
+ // At -O2 and above, track CR bits as individual registers.
+ if (OptLevel >= CodeGenOpt::Default) {
+ if (!FullFS.empty())
+ FullFS = "+crbits," + FullFS;
+ else
+ FullFS = "+crbits";
+ }
+
// Parse features string.
ParseSubtargetFeatures(CPUName, FullFS);
@@ -144,6 +198,11 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Determine endianness.
IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
+
+ // FIXME: For now, we disable VSX in little-endian mode until endian
+ // issues in those instructions can be addressed.
+ if (IsLittleEndian)
+ HasVSX = false;
}
/// hasLazyResolverStub - Return true if accesses to the specified global have
@@ -163,23 +222,7 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
GV->hasCommonLinkage() || isDecl;
}
-bool PPCSubtarget::enablePostRAScheduler(
- CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const {
- Mode = TargetSubtargetInfo::ANTIDEP_ALL;
-
- CriticalPathRCs.clear();
-
- if (isPPC64())
- CriticalPathRCs.push_back(&PPC::G8RCRegClass);
- else
- CriticalPathRCs.push_back(&PPC::GPRCRegClass);
-
- return OptLevel >= CodeGenOpt::Default;
-}
-
-// Embedded cores need aggressive scheduling.
+// Embedded cores need aggressive scheduling (and some others also benefit).
static bool needsAggressiveScheduling(unsigned Directive) {
switch (Directive) {
default: return false;
@@ -187,6 +230,8 @@ static bool needsAggressiveScheduling(unsigned Directive) {
case PPC::DIR_A2:
case PPC::DIR_E500mc:
case PPC::DIR_E5500:
+ case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
return true;
}
}
@@ -198,6 +243,19 @@ bool PPCSubtarget::enableMachineScheduler() const {
return needsAggressiveScheduling(DarwinDirective);
}
+// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+bool PPCSubtarget::enablePostMachineScheduler() const { return true; }
+
+PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const {
+ return TargetSubtargetInfo::ANTIDEP_ALL;
+}
+
+void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(isPPC64() ?
+ &PPC::G8RCRegClass : &PPC::GPRCRegClass);
+}
+
void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
MachineInstr *begin,
MachineInstr *end,
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index ec8c82ad521c..a3cedafb5ef2 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -14,7 +14,13 @@
#ifndef POWERPCSUBTARGET_H
#define POWERPCSUBTARGET_H
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
+#include "PPCJITInfo.h"
+#include "PPCSelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -50,6 +56,7 @@ namespace PPC {
DIR_PWR6,
DIR_PWR6X,
DIR_PWR7,
+ DIR_PWR8,
DIR_64
};
}
@@ -73,6 +80,7 @@ protected:
bool HasMFOCRF;
bool Has64BitSupport;
bool Use64BitRegs;
+ bool UseCRBits;
bool IsPPC64;
bool HasAltivec;
bool HasQPX;
@@ -98,12 +106,23 @@ protected:
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
+ /// OptLevel - What default optimization level we're emitting code for.
+ CodeGenOpt::Level OptLevel;
+
+ PPCFrameLowering FrameLowering;
+ const DataLayout DL;
+ PPCInstrInfo InstrInfo;
+ PPCJITInfo JITInfo;
+ PPCTargetLowering TLInfo;
+ PPCSelectionDAGInfo TSInfo;
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit);
+ const std::string &FS, PPCTargetMachine &TM, bool is64Bit,
+ CodeGenOpt::Level OptLevel);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
@@ -122,12 +141,23 @@ public:
///
unsigned getDarwinDirective() const { return DarwinDirective; }
- /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ const PPCFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ PPCJITInfo *getJITInfo() { return &JITInfo; }
+ const PPCTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const PPCSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+
+ /// initializeSubtargetDependencies - Initializes using a CPU and feature string
+ /// so that we can use initializer lists for subtarget initialization.
+ PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
+
/// \brief Reset the features for the PowerPC target.
- virtual void resetSubtargetFeatures(const MachineFunction *MF);
+ void resetSubtargetFeatures(const MachineFunction *MF) override;
private:
void initializeEnvironment();
void resetSubtargetFeatures(StringRef CPU, StringRef FS);
@@ -146,6 +176,10 @@ public:
/// has64BitSupport() returns true.
bool use64BitRegs() const { return Use64BitRegs; }
+ /// useCRBits - Return true if we should store and manipulate i1 values in
+ /// the individual condition register bits.
+ bool useCRBits() const { return UseCRBits; }
+
/// hasLazyResolverStub - Return true if accesses to the specified global have
/// to go through a dyld lazy resolution stub. This means that an extra load
/// is required to get the address of the global.
@@ -172,6 +206,7 @@ public:
bool hasFPCVT() const { return HasFPCVT; }
bool hasAltivec() const { return HasAltivec; }
bool hasQPX() const { return HasQPX; }
+ bool hasVSX() const { return HasVSX; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
@@ -184,26 +219,32 @@ public:
/// isDarwin - True if this is any darwin platform.
bool isDarwin() const { return TargetTriple.isMacOSX(); }
- /// isBGP - True if this is a BG/P platform.
- bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; }
/// isBGQ - True if this is a BG/Q platform.
bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+ bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
+
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
+ /// FIXME: Should use a command-line option.
+ bool isELFv2ABI() const { return isPPC64() && isSVR4ABI() &&
+ isLittleEndian(); }
- /// enablePostRAScheduler - True at 'More' optimization.
- bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const;
+ bool enableEarlyIfConversion() const override { return hasISEL(); }
// Scheduling customization.
- bool enableMachineScheduler() const;
+ bool enableMachineScheduler() const override;
+ // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+ bool enablePostMachineScheduler() const override;
+ AntiDepBreakMode getAntiDepBreakMode() const override;
+ void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
+
void overrideSchedPolicy(MachineSchedPolicy &Policy,
MachineInstr *begin,
MachineInstr *end,
- unsigned NumRegionInstrs) const;
- bool useAA() const;
+ unsigned NumRegionInstrs) const override;
+ bool useAA() const override;
};
} // End llvm namespace
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index d6767d51f2cc..9563b9045c39 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -26,6 +26,10 @@ static cl::
opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
cl::desc("Disable CTR loops for PPC"));
+static cl::opt<bool>
+VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
+ cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
+
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
@@ -33,59 +37,12 @@ extern "C" void LLVMInitializePowerPCTarget() {
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
}
-/// Return the datalayout string of a subtarget.
-static std::string getDataLayoutString(const PPCSubtarget &ST) {
- const Triple &T = ST.getTargetTriple();
-
- // PPC is big endian
- std::string Ret = "E";
-
- // PPC64 has 64 bit pointers, PPC32 has 32 bit pointers.
- if (ST.isPPC64())
- Ret += "-p:64:64";
- else
- Ret += "-p:32:32";
-
- // Note, the alignment values for f64 and i64 on ppc64 in Darwin
- // documentation are wrong; these are correct (i.e. "what gcc does").
- if (ST.isPPC64() || ST.isSVR4ABI())
- Ret += "-f64:64:64-i64:64:64";
- else
- Ret += "-f64:32:64";
-
- // Set support for 128 floats depending on the ABI.
- if (!ST.isPPC64() && ST.isSVR4ABI())
- Ret += "-f128:64:128";
-
- // Some ABIs support 128 bit vectors.
- if (ST.isPPC64() && ST.isSVR4ABI())
- Ret += "-v128:128:128";
-
- // PPC64 has 32 and 64 bit register, PPC32 has only 32 bit ones.
- if (ST.isPPC64())
- Ret += "-n32:64";
- else
- Ret += "-n32";
-
- return Ret;
-}
-
-PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
+PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, is64Bit),
- DL(getDataLayoutString(Subtarget)), InstrInfo(*this),
- FrameLowering(Subtarget), JITInfo(*this, is64Bit),
- TLInfo(*this), TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()) {
-
- // The binutils for the BG/P are too old for CFI.
- if (Subtarget.isBGP())
- setMCUseCFI(false);
+ CodeGenOpt::Level OL, bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this, is64Bit, OL) {
initAsmInfo();
}
@@ -129,11 +86,12 @@ public:
return *getPPCTargetMachine().getSubtargetImpl();
}
- virtual bool addPreISel();
- virtual bool addILPOpts();
- virtual bool addInstSelector();
- virtual bool addPreSched2();
- virtual bool addPreEmitPass();
+ bool addPreISel() override;
+ bool addILPOpts() override;
+ bool addInstSelector() override;
+ bool addPreRegAlloc() override;
+ bool addPreSched2() override;
+ bool addPreEmitPass() override;
};
} // namespace
@@ -149,12 +107,8 @@ bool PPCPassConfig::addPreISel() {
}
bool PPCPassConfig::addILPOpts() {
- if (getPPCSubtarget().hasISEL()) {
- addPass(&EarlyIfConverterID);
- return true;
- }
-
- return false;
+ addPass(&EarlyIfConverterID);
+ return true;
}
bool PPCPassConfig::addInstSelector() {
@@ -166,10 +120,20 @@ bool PPCPassConfig::addInstSelector() {
addPass(createPPCCTRLoopsVerify());
#endif
+ addPass(createPPCVSXCopyPass());
+ return false;
+}
+
+bool PPCPassConfig::addPreRegAlloc() {
+ initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
+ insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
+ &PPCVSXFMAMutateID);
return false;
}
bool PPCPassConfig::addPreSched2() {
+ addPass(createPPCVSXCopyCleanupPass());
+
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 606ccb314126..4c7029ca7a36 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -14,11 +14,7 @@
#ifndef PPC_TARGETMACHINE_H
#define PPC_TARGETMACHINE_H
-#include "PPCFrameLowering.h"
-#include "PPCISelLowering.h"
#include "PPCInstrInfo.h"
-#include "PPCJITInfo.h"
-#include "PPCSelectionDAGInfo.h"
#include "PPCSubtarget.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
@@ -29,13 +25,6 @@ namespace llvm {
///
class PPCTargetMachine : public LLVMTargetMachine {
PPCSubtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- PPCInstrInfo InstrInfo;
- PPCFrameLowering FrameLowering;
- PPCJITInfo JITInfo;
- PPCTargetLowering TLInfo;
- PPCSelectionDAGInfo TSInfo;
- InstrItineraryData InstrItins;
public:
PPCTargetMachine(const Target &T, StringRef TT,
@@ -43,34 +32,38 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64Bit);
- virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const PPCFrameLowering *getFrameLowering() const {
- return &FrameLowering;
+ const PPCInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
}
- virtual PPCJITInfo *getJITInfo() { return &JITInfo; }
- virtual const PPCTargetLowering *getTargetLowering() const {
- return &TLInfo;
+ const PPCFrameLowering *getFrameLowering() const override {
+ return getSubtargetImpl()->getFrameLowering();
}
- virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const {
- return &TSInfo;
+ PPCJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
+ const PPCTargetLowering *getTargetLowering() const override {
+ return getSubtargetImpl()->getTargetLowering();
}
- virtual const PPCRegisterInfo *getRegisterInfo() const {
- return &InstrInfo.getRegisterInfo();
+ const PPCSelectionDAGInfo* getSelectionDAGInfo() const override {
+ return getSubtargetImpl()->getSelectionDAGInfo();
+ }
+ const PPCRegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
}
- virtual const DataLayout *getDataLayout() const { return &DL; }
- virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; }
- virtual const InstrItineraryData *getInstrItineraryData() const {
- return &InstrItins;
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
+ }
+ const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &getSubtargetImpl()->getInstrItineraryData();
}
// Pass Pipeline Configuration
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
- virtual bool addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE);
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ bool addCodeEmitter(PassManagerBase &PM,
+ JITCodeEmitter &JCE) override;
/// \brief Register PPC analysis passes with a pass manager.
- virtual void addAnalysisPasses(PassManagerBase &PM);
+ void addAnalysisPasses(PassManagerBase &PM) override;
};
/// PPC32TargetMachine - PowerPC 32-bit target machine.
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index ec1e606eee56..2903cc192aa8 100644
--- a/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -8,10 +8,10 @@
//===----------------------------------------------------------------------===//
#include "PPCTargetObjectFile.h"
+#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Target/Mangler.h"
using namespace llvm;
@@ -22,16 +22,9 @@ Initialize(MCContext &Ctx, const TargetMachine &TM) {
InitializeELF(TM.Options.UseInitArray);
}
-const MCSection * PPC64LinuxTargetObjectFile::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler *Mang, const TargetMachine &TM) const {
-
- const MCSection *DefaultSection =
- TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM);
-
- if (DefaultSection != ReadOnlySection)
- return DefaultSection;
-
+const MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
+ const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
// Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI
// when we have a constant that contains global relocations. This is
// necessary because of this ABI's handling of pointers to functions in
@@ -46,14 +39,17 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// linker, so we must use DataRelROSection instead of ReadOnlySection.
// For more information, see the description of ELIMINATE_COPY_RELOCS in
// GNU ld.
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (Kind.isReadOnly()) {
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (GVar && GVar->isConstant() &&
- (GVar->getInitializer()->getRelocationInfo() ==
- Constant::GlobalRelocations))
- return DataRelROSection;
+ if (GVar && GVar->isConstant() &&
+ (GVar->getInitializer()->getRelocationInfo() ==
+ Constant::GlobalRelocations))
+ Kind = SectionKind::getReadOnlyWithRel();
+ }
- return DefaultSection;
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind,
+ Mang, TM);
}
const MCExpr *PPC64LinuxTargetObjectFile::
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h
index 262c52213d29..3e71bbc67379 100644
--- a/lib/Target/PowerPC/PPCTargetObjectFile.h
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -20,14 +20,14 @@ namespace llvm {
/// 64-bit PowerPC Linux.
class PPC64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
- virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
- virtual const MCSection *
- SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler *Mang, const TargetMachine &TM) const;
+ const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const override;
/// \brief Describe a TLS variable address within debug info.
- virtual const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const;
+ const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index e876be16a9b3..73fb69101353 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -15,8 +15,12 @@
namespace llvm {
class PPCTargetStreamer : public MCTargetStreamer {
public:
+ PPCTargetStreamer(MCStreamer &S);
virtual ~PPCTargetStreamer();
virtual void emitTCEntry(const MCSymbol &S) = 0;
+ virtual void emitMachine(StringRef CPU) = 0;
+ virtual void emitAbiVersion(int AbiVersion) = 0;
+ virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) = 0;
};
}
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 8879630270e2..007901b23e0c 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -14,17 +14,22 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ppctti"
#include "PPC.h"
#include "PPCTargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
using namespace llvm;
+#define DEBUG_TYPE "ppctti"
+
+static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
+cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
+
// Declare the pass initialization routine locally as target-specific passes
-// don't havve a target-wide initialization entry point, and so we rely on the
+// don't have a target-wide initialization entry point, and so we rely on the
// pass constructor initialization.
namespace llvm {
void initializePPCTTIPass(PassRegistry &);
@@ -32,35 +37,26 @@ void initializePPCTTIPass(PassRegistry &);
namespace {
-class PPCTTI : public ImmutablePass, public TargetTransformInfo {
- const PPCTargetMachine *TM;
+class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
const PPCSubtarget *ST;
const PPCTargetLowering *TLI;
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
public:
- PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ PPCTTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) {
llvm_unreachable("This pass cannot be directly constructed");
}
PPCTTI(const PPCTargetMachine *TM)
- : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
TLI(TM->getTargetLowering()) {
initializePPCTTIPass(*PassRegistry::getPassRegistry());
}
- virtual void initializePass() {
+ virtual void initializePass() override {
pushTTIStack(this);
}
- virtual void finalizePass() {
- popTTIStack();
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
TargetTransformInfo::getAnalysisUsage(AU);
}
@@ -68,7 +64,7 @@ public:
static char ID;
/// Provide necessary pointer adjustments for the two base classes.
- virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ virtual void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
return (TargetTransformInfo*)this;
return this;
@@ -76,31 +72,40 @@ public:
/// \name Scalar TTI Implementations
/// @{
- virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
- virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
+ unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
+
+ unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) const override;
+ unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) const override;
+
+ virtual PopcntSupportKind
+ getPopcntSupport(unsigned TyWidth) const override;
+ virtual void getUnrollingPreferences(
+ Loop *L, UnrollingPreferences &UP) const override;
/// @}
/// \name Vector TTI Implementations
/// @{
- virtual unsigned getNumberOfRegisters(bool Vector) const;
- virtual unsigned getRegisterBitWidth(bool Vector) const;
- virtual unsigned getMaximumUnrollFactor() const;
+ virtual unsigned getNumberOfRegisters(bool Vector) const override;
+ virtual unsigned getRegisterBitWidth(bool Vector) const override;
+ virtual unsigned getMaximumUnrollFactor() const override;
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
OperandValueKind,
- OperandValueKind) const;
+ OperandValueKind) const override;
virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
- int Index, Type *SubTp) const;
+ int Index, Type *SubTp) const override;
virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const;
+ Type *Src) const override;
virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const;
+ Type *CondTy) const override;
virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const;
+ unsigned Index) const override;
virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
- unsigned AddressSpace) const;
+ unsigned AddressSpace) const override;
/// @}
};
@@ -130,6 +135,142 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
return PSK_Software;
}
+unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+ if (DisablePPCConstHoist)
+ return TargetTransformInfo::getIntImmCost(Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ if (Imm == 0)
+ return TCC_Free;
+
+ if (Imm.getBitWidth() <= 64) {
+ if (isInt<16>(Imm.getSExtValue()))
+ return TCC_Basic;
+
+ if (isInt<32>(Imm.getSExtValue())) {
+ // A constant that can be materialized using lis.
+ if ((Imm.getZExtValue() & 0xFFFF) == 0)
+ return TCC_Basic;
+
+ return 2 * TCC_Basic;
+ }
+ }
+
+ return 4 * TCC_Basic;
+}
+
+unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
+ if (DisablePPCConstHoist)
+ return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ switch (IID) {
+ default: return TCC_Free;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
+ return TCC_Free;
+ break;
+ }
+ return PPCTTI::getIntImmCost(Imm, Ty);
+}
+
+unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) const {
+ if (DisablePPCConstHoist)
+ return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ unsigned ImmIdx = ~0U;
+ bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
+ ZeroFree = false;
+ switch (Opcode) {
+ default: return TCC_Free;
+ case Instruction::GetElementPtr:
+ // Always hoist the base address of a GetElementPtr. This prevents the
+ // creation of new constants for every base constant that gets constant
+ // folded with the offset.
+ if (Idx == 0)
+ return 2 * TCC_Basic;
+ return TCC_Free;
+ case Instruction::And:
+ RunFree = true; // (for the rotate-and-mask instructions)
+ // Fallthrough...
+ case Instruction::Add:
+ case Instruction::Or:
+ case Instruction::Xor:
+ ShiftedFree = true;
+ // Fallthrough...
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ ImmIdx = 1;
+ break;
+ case Instruction::ICmp:
+ UnsignedFree = true;
+ ImmIdx = 1;
+ // Fallthrough... (zero comparisons can use record-form instructions)
+ case Instruction::Select:
+ ZeroFree = true;
+ break;
+ case Instruction::PHI:
+ case Instruction::Call:
+ case Instruction::Ret:
+ case Instruction::Load:
+ case Instruction::Store:
+ break;
+ }
+
+ if (ZeroFree && Imm == 0)
+ return TCC_Free;
+
+ if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
+ if (isInt<16>(Imm.getSExtValue()))
+ return TCC_Free;
+
+ if (RunFree) {
+ if (Imm.getBitWidth() <= 32 &&
+ (isShiftedMask_32(Imm.getZExtValue()) ||
+ isShiftedMask_32(~Imm.getZExtValue())))
+ return TCC_Free;
+
+
+ if (ST->isPPC64() &&
+ (isShiftedMask_64(Imm.getZExtValue()) ||
+ isShiftedMask_64(~Imm.getZExtValue())))
+ return TCC_Free;
+ }
+
+ if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
+ return TCC_Free;
+
+ if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
+ return TCC_Free;
+ }
+
+ return PPCTTI::getIntImmCost(Imm, Ty);
+}
+
void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
if (ST->getDarwinDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
@@ -141,7 +282,7 @@ void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
if (Vector && !ST->hasAltivec())
return 0;
- return 32;
+ return ST->hasVSX() ? 64 : 32;
}
unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
@@ -210,11 +351,21 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
+ // Double-precision scalars are already located in index #0.
+ if (Index == 0)
+ return 0;
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+ }
+
// Estimated cost of a load-hit-store delay. This was obtained
// experimentally as a minimum needed to prevent unprofitable
// vectorization for the paq8p benchmark. It may need to be
// raised further if other unprofitable cases remain.
- unsigned LHSPenalty = 12;
+ unsigned LHSPenalty = 2;
+ if (ISD == ISD::INSERT_VECTOR_ELT)
+ LHSPenalty += 7;
// Vector element insert/extract with Altivec is very expensive,
// because they require store and reload with the attendant
@@ -235,14 +386,34 @@ unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
- // Each load/store unit costs 1.
- unsigned Cost = LT.first * 1;
+ unsigned Cost =
+ TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+
+ // VSX loads/stores support unaligned access.
+ if (ST->hasVSX()) {
+ if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64)
+ return Cost;
+ }
+
+ bool UnalignedAltivec =
+ Src->isVectorTy() &&
+ Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() &&
+ LT.second.getSizeInBits() == 128 &&
+ Opcode == Instruction::Load;
// PPC in general does not support unaligned loads and stores. They'll need
// to be decomposed based on the alignment factor.
unsigned SrcBytes = LT.second.getStoreSize();
- if (SrcBytes && Alignment && Alignment < SrcBytes)
- Cost *= (SrcBytes/Alignment);
+ if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) {
+ Cost += LT.first*(SrcBytes/Alignment-1);
+
+ // For a vector type, there is also scalarization overhead (only for
+ // stores, loads are expanded using the vector-load + permutation sequence,
+ // which is much less expensive).
+ if (Src->isVectorTy() && Opcode == Instruction::Store)
+ for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
+ Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
+ }
return Cost;
}
diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
index fdb8a62b9d24..c9548c7fe0cd 100644
--- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
+++ b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
@@ -1,7 +1,3 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
add_llvm_library(LLVMPowerPCInfo
PowerPCTargetInfo.cpp
)
-
-add_dependencies(LLVMPowerPCInfo PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
index f77d85b15ab9..410234686400 100644
--- a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = PowerPCInfo
parent = PowerPC
-required_libraries = MC Support Target
+required_libraries = Support
add_to_library_groups = PowerPC