summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/PowerPC
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
commitcfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Target/PowerPC
parent706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
Notes
Diffstat (limited to 'llvm/lib/Target/PowerPC')
-rw-r--r--llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp83
-rw-r--r--llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp76
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp79
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp23
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp112
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h54
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h7
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp75
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h13
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp11
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp124
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h14
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp63
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h26
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp84
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h3
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp380
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp49
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td1
-rw-r--r--llvm/lib/Target/PowerPC/PPC.h37
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td95
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp593
-rw-r--r--llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/PPCBranchSelector.cpp35
-rw-r--r--llvm/lib/Target/PowerPC/PPCCTRLoops.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCCallingConv.td44
-rw-r--r--llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/PPCExpandISEL.cpp57
-rw-r--r--llvm/lib/Target/PowerPC/PPCFastISel.cpp91
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp942
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.h21
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp510
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp2855
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h181
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td132
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrAltivec.td59
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrFormats.td6
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrHTM.td7
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp1397
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.h185
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td495
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrPrefix.td1035
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrQPX.td31
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrSPE.td16
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td5786
-rw-r--r--llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp12
-rw-r--r--llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp34
-rw-r--r--llvm/lib/Target/PowerPC/PPCMCInstLower.cpp78
-rw-r--r--llvm/lib/Target/PowerPC/PPCMIPeephole.cpp47
-rw-r--r--llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp23
-rw-r--r--llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h41
-rw-r--r--llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp52
-rw-r--r--llvm/lib/Target/PowerPC/PPCMachineScheduler.h3
-rw-r--r--llvm/lib/Target/PowerPC/PPCMacroFusion.cpp203
-rw-r--r--llvm/lib/Target/PowerPC/PPCMacroFusion.def45
-rw-r--r--llvm/lib/Target/PowerPC/PPCMacroFusion.h22
-rw-r--r--llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp10
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp288
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.h22
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.td20
-rw-r--r--llvm/lib/Target/PowerPC/PPCScheduleP9.td10
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.cpp38
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.h95
-rw-r--r--llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.cpp21
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp167
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h44
70 files changed, 10767 insertions, 6418 deletions
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 7e7902c27a810..13fd7d05ab9f4 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -109,6 +109,8 @@ class PPCAsmParser : public MCTargetAsmParser {
bool MatchRegisterName(unsigned &RegNo, int64_t &IntVal);
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
const MCExpr *ExtractModifierFromExpr(const MCExpr *E,
PPCMCExpr::VariantKind &Variant);
@@ -356,6 +358,16 @@ public:
bool isS16ImmX16() const { return Kind == Expression ||
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 15) == 0); }
+ bool isS34ImmX16() const {
+ return Kind == Expression ||
+ (Kind == Immediate && isInt<34>(getImm()) && (getImm() & 15) == 0);
+ }
+ bool isS34Imm() const {
+ // Once the PC-Rel ABI is finalized, evaluate whether a 34-bit
+ // ContextImmediate is needed.
+ return Kind == Expression || (Kind == Immediate && isInt<34>(getImm()));
+ }
+
bool isS17Imm() const {
switch (Kind) {
case Expression:
@@ -388,6 +400,7 @@ public:
bool isCondBr() const { return Kind == Expression ||
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 3) == 0); }
+ bool isImmZero() const { return Kind == Immediate && getImm() == 0; }
bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isVSRegNumber() const {
return Kind == Immediate && isUInt<6>(getImm());
@@ -1142,7 +1155,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Post-process instructions (typically extended mnemonics)
ProcessInstruction(Inst, Operands);
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, getSTI());
+ Out.emitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature:
return Error(IDLoc, "instruction use requires an option to be enabled");
@@ -1210,14 +1223,22 @@ bool PPCAsmParser::MatchRegisterName(unsigned &RegNo, int64_t &IntVal) {
bool PPCAsmParser::
ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
+ return TokError("invalid register name");
+ return false;
+}
+
+OperandMatchResultTy PPCAsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
RegNo = 0;
int64_t IntVal;
if (MatchRegisterName(RegNo, IntVal))
- return TokError("invalid register name");
- return false;
+ return MatchOperand_NoMatch;
+ return MatchOperand_Success;
}
/// Extract \code @l/@ha \endcode modifier from expression. Recursively scan
@@ -1380,7 +1401,7 @@ ParseExpression(const MCExpr *&EVal) {
PPCMCExpr::VariantKind Variant;
const MCExpr *E = ExtractModifierFromExpr(EVal, Variant);
if (E)
- EVal = PPCMCExpr::create(Variant, E, false, getParser().getContext());
+ EVal = PPCMCExpr::create(Variant, E, getParser().getContext());
return false;
}
@@ -1427,7 +1448,7 @@ ParseDarwinExpression(const MCExpr *&EVal) {
if (getLexer().isNot(AsmToken::RParen))
return Error(Parser.getTok().getLoc(), "expected ')'");
Parser.Lex(); // Eat the ')'
- EVal = PPCMCExpr::create(Variant, EVal, false, getParser().getContext());
+ EVal = PPCMCExpr::create(Variant, EVal, getParser().getContext());
}
return false;
}
@@ -1560,12 +1581,12 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// instruction name, to match what TableGen is doing.
std::string NewOpcode;
if (parseOptionalToken(AsmToken::Plus)) {
- NewOpcode = Name;
+ NewOpcode = std::string(Name);
NewOpcode += '+';
Name = NewOpcode;
}
if (parseOptionalToken(AsmToken::Minus)) {
- NewOpcode = Name;
+ NewOpcode = std::string(Name);
NewOpcode += '-';
Name = NewOpcode;
}
@@ -1658,9 +1679,9 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, AsmToken ID) {
if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
return Error(ExprLoc, "literal value out of range for '" +
ID.getIdentifier() + "' directive");
- getStreamer().EmitIntValue(IntValue, Size);
+ getStreamer().emitIntValue(IntValue, Size);
} else
- getStreamer().EmitValue(Value, Size, ExprLoc);
+ getStreamer().emitValue(Value, Size, ExprLoc);
return false;
};
@@ -1681,7 +1702,7 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, AsmToken ID) {
return addErrorSuffix(" in '.tc' directive");
// Align to word size.
- getParser().getStreamer().EmitValueToAlignment(Size);
+ getParser().getStreamer().emitValueToAlignment(Size);
// Emit expressions.
return ParseDirectiveWord(Size, ID);
@@ -1710,10 +1731,10 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
if (parseToken(AsmToken::EndOfStatement))
return addErrorSuffix(" in '.machine' directive");
- PPCTargetStreamer &TStreamer =
- *static_cast<PPCTargetStreamer *>(
- getParser().getStreamer().getTargetStreamer());
- TStreamer.emitMachine(CPU);
+ PPCTargetStreamer *TStreamer = static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ if (TStreamer != nullptr)
+ TStreamer->emitMachine(CPU);
return false;
}
@@ -1752,10 +1773,10 @@ bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) {
parseToken(AsmToken::EndOfStatement))
return addErrorSuffix(" in '.abiversion' directive");
- PPCTargetStreamer &TStreamer =
- *static_cast<PPCTargetStreamer *>(
- getParser().getStreamer().getTargetStreamer());
- TStreamer.emitAbiVersion(AbiVersion);
+ PPCTargetStreamer *TStreamer = static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ if (TStreamer != nullptr)
+ TStreamer->emitAbiVersion(AbiVersion);
return false;
}
@@ -1775,10 +1796,10 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
parseToken(AsmToken::EndOfStatement))
return addErrorSuffix(" in '.localentry' directive");
- PPCTargetStreamer &TStreamer =
- *static_cast<PPCTargetStreamer *>(
- getParser().getStreamer().getTargetStreamer());
- TStreamer.emitLocalEntry(Sym, Expr);
+ PPCTargetStreamer *TStreamer = static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ if (TStreamer != nullptr)
+ TStreamer->emitLocalEntry(Sym, Expr);
return false;
}
@@ -1830,23 +1851,23 @@ PPCAsmParser::applyModifierToExpr(const MCExpr *E,
MCContext &Ctx) {
switch (Variant) {
case MCSymbolRefExpr::VK_PPC_LO:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_LO, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_LO, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HI:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HI, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HI, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HA:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HA, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HA, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGH:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGH, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGH, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGHA:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHA, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHA, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGHER:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHER, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGHERA:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHERA, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGHEST:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHEST, E, Ctx);
case MCSymbolRefExpr::VK_PPC_HIGHESTA:
- return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx);
+ return PPCMCExpr::create(PPCMCExpr::VK_PPC_HIGHESTA, E, Ctx);
default:
return nullptr;
}
diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index e3c0f958c7ed8..74c6fd3733f03 100644
--- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -60,9 +60,16 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCDisassembler() {
createPPCLEDisassembler);
}
-static DecodeStatus DecodePCRel24BranchTarget(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus decodeCondBrTarget(MCInst &Inst, unsigned Imm,
+ uint64_t /*Address*/,
+ const void * /*Decoder*/) {
+ Inst.addOperand(MCOperand::createImm(SignExtend32<14>(Imm)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeDirectBrTarget(MCInst &Inst, unsigned Imm,
+ uint64_t /*Address*/,
+ const void * /*Decoder*/) {
int32_t Offset = SignExtend32<24>(Imm);
Inst.addOperand(MCOperand::createImm(Offset));
return MCDisassembler::Success;
@@ -191,6 +198,14 @@ static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
return MCDisassembler::Success;
}
+static DecodeStatus decodeImmZeroOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ if (Imm != 0)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(Imm));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
int64_t Address, const void *Decoder) {
// Decode the memri field (imm, reg), which has the low 16-bits as the
@@ -262,6 +277,35 @@ static DecodeStatus decodeMemRIX16Operands(MCInst &Inst, uint64_t Imm,
return MCDisassembler::Success;
}
+static DecodeStatus decodeMemRI34PCRelOperands(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const void *Decoder) {
+ // Decode the memri34_pcrel field (imm, reg), which has the low 34-bits as the
+ // displacement, and the next 5 bits as an immediate 0.
+ uint64_t Base = Imm >> 34;
+ uint64_t Disp = Imm & 0x3FFFFFFFFUL;
+
+ assert(Base < 32 && "Invalid base register");
+
+ Inst.addOperand(MCOperand::createImm(SignExtend64<34>(Disp)));
+ return decodeImmZeroOperand(Inst, Base, Address, Decoder);
+}
+
+static DecodeStatus decodeMemRI34Operands(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const void *Decoder) {
+ // Decode the memri34 field (imm, reg), which has the low 34-bits as the
+ // displacement, and the next 5 bits as the register #.
+ uint64_t Base = Imm >> 34;
+ uint64_t Disp = Imm & 0x3FFFFFFFFUL;
+
+ assert(Base < 32 && "Invalid base register");
+
+ Inst.addOperand(MCOperand::createImm(SignExtend64<34>(Disp)));
+ Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus decodeSPE8Operands(MCInst &Inst, uint64_t Imm,
int64_t Address, const void *Decoder) {
// Decode the spe8disp field (imm, reg), which has the low 5-bits as the
@@ -324,6 +368,29 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &CS) const {
+ auto *ReadFunc = IsLittleEndian ? support::endian::read32le
+ : support::endian::read32be;
+
+ // If this is an 8-byte prefixed instruction, handle it here.
+ // Note: prefixed instructions aren't technically 8-byte entities - the prefix
+ // appears in memory at an address 4 bytes prior to that of the base
+ // instruction regardless of endianness. So we read the two pieces and
+ // rebuild the 8-byte instruction.
+ // TODO: In this function we call decodeInstruction several times with
+ // different decoder tables. It may be possible to only call once by
+ // looking at the top 6 bits of the instruction.
+ if (STI.getFeatureBits()[PPC::FeaturePrefixInstrs] && Bytes.size() >= 8) {
+ uint32_t Prefix = ReadFunc(Bytes.data());
+ uint32_t BaseInst = ReadFunc(Bytes.data() + 4);
+ uint64_t Inst = BaseInst | (uint64_t)Prefix << 32;
+ DecodeStatus result = decodeInstruction(DecoderTable64, MI, Inst, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 8;
+ return result;
+ }
+ }
+
// Get the four bytes of the instruction.
Size = 4;
if (Bytes.size() < 4) {
@@ -332,8 +399,7 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
// Read the instruction in the proper endianness.
- uint32_t Inst = IsLittleEndian ? support::endian::read32le(Bytes.data())
- : support::endian::read32be(Bytes.data());
+ uint64_t Inst = ReadFunc(Bytes.data());
if (STI.getFeatureBits()[PPC::FeatureQPX]) {
DecodeStatus result =
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 8778e916f7e4d..dbaf221db9fc9 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -28,7 +28,6 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
- case FK_NONE:
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
@@ -40,11 +39,14 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
return Value & 0xfffc;
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
+ case PPC::fixup_ppc_br24_notoc:
return Value & 0x3fffffc;
case PPC::fixup_ppc_half16:
return Value & 0xffff;
case PPC::fixup_ppc_half16ds:
return Value & 0xfffc;
+ case PPC::fixup_ppc_pcrel34:
+ return Value & 0x3ffffffff;
}
}
@@ -52,8 +54,6 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
- case FK_NONE:
- return 0;
case FK_Data_1:
return 1;
case FK_Data_2:
@@ -65,7 +65,9 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case PPC::fixup_ppc_brcond14abs:
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
+ case PPC::fixup_ppc_br24_notoc:
return 4;
+ case PPC::fixup_ppc_pcrel34:
case FK_Data_8:
return 8;
case PPC::fixup_ppc_nofixup:
@@ -91,24 +93,33 @@ public:
const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_br24_notoc", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_br24abs", 6, 24, 0 },
{ "fixup_ppc_brcond14abs", 16, 14, 0 },
{ "fixup_ppc_half16", 0, 16, 0 },
{ "fixup_ppc_half16ds", 0, 14, 0 },
+ { "fixup_ppc_pcrel34", 0, 34, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_nofixup", 0, 0, 0 }
};
const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_ppc_br24", 2, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_br24_notoc", 2, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_brcond14", 2, 14, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_br24abs", 2, 24, 0 },
{ "fixup_ppc_brcond14abs", 2, 14, 0 },
{ "fixup_ppc_half16", 0, 16, 0 },
{ "fixup_ppc_half16ds", 2, 14, 0 },
+ { "fixup_ppc_pcrel34", 0, 34, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_nofixup", 0, 0, 0 }
};
+ // Fixup kinds from .reloc directive are like R_PPC_NONE/R_PPC64_NONE. They
+ // do not require any extra processing.
+ if (Kind >= FirstLiteralRelocationKind)
+ return MCAsmBackend::getFixupKindInfo(FK_NONE);
+
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
@@ -123,11 +134,14 @@ public:
const MCValue &Target, MutableArrayRef<char> Data,
uint64_t Value, bool IsResolved,
const MCSubtargetInfo *STI) const override {
- Value = adjustFixupValue(Fixup.getKind(), Value);
+ MCFixupKind Kind = Fixup.getKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return;
+ Value = adjustFixupValue(Kind, Value);
if (!Value) return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
- unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+ unsigned NumBytes = getFixupKindNumBytes(Kind);
// For each byte of the fragment that the fixup touches, mask in the bits
// from the fixup value. The Value has been "split up" into the appropriate
@@ -140,13 +154,13 @@ public:
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override {
- switch ((unsigned)Fixup.getKind()) {
+ MCFixupKind Kind = Fixup.getKind();
+ switch ((unsigned)Kind) {
default:
- return false;
- case FK_NONE:
- return true;
+ return Kind >= FirstLiteralRelocationKind;
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
+ case PPC::fixup_ppc_br24_notoc:
// If the target symbol has a local entry point we must not attempt
// to resolve the fixup directly. Emit a relocation and leave
// resolution of the final target address to the linker.
@@ -178,8 +192,8 @@ public:
llvm_unreachable("relaxInstruction() unimplemented");
}
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override {
// FIXME.
llvm_unreachable("relaxInstruction() unimplemented");
}
@@ -200,21 +214,6 @@ public:
// FIXME: This should be in a separate file.
namespace {
-class DarwinPPCAsmBackend : public PPCAsmBackend {
-public:
- DarwinPPCAsmBackend(const Target &T, const Triple &TT)
- : PPCAsmBackend(T, TT) {}
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override {
- bool Is64 = TT.isPPC64();
- return createPPCMachObjectWriter(
- /*Is64Bit=*/Is64,
- (Is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
- MachO::CPU_SUBTYPE_POWERPC_ALL);
- }
-};
-
class ELFPPCAsmBackend : public PPCAsmBackend {
public:
ELFPPCAsmBackend(const Target &T, const Triple &TT) : PPCAsmBackend(T, TT) {}
@@ -243,14 +242,25 @@ public:
} // end anonymous namespace
Optional<MCFixupKind> ELFPPCAsmBackend::getFixupKind(StringRef Name) const {
- if (TT.isPPC64()) {
- if (Name == "R_PPC64_NONE")
- return FK_NONE;
- } else {
- if (Name == "R_PPC_NONE")
- return FK_NONE;
+ if (TT.isOSBinFormatELF()) {
+ unsigned Type;
+ if (TT.isPPC64()) {
+ Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def"
+#undef ELF_RELOC
+ .Default(-1u);
+ } else {
+ Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/PowerPC.def"
+#undef ELF_RELOC
+ .Default(-1u);
+ }
+ if (Type != -1u)
+ return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
}
- return MCAsmBackend::getFixupKind(Name);
+ return None;
}
MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
@@ -258,9 +268,6 @@ MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
const Triple &TT = STI.getTargetTriple();
- if (TT.isOSDarwin())
- return new DarwinPPCAsmBackend(T, TT);
-
if (TT.isOSBinFormatXCOFF())
return new XCOFFPPCAsmBackend(T, TT);
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 20f752c3041ad..d8b3301e97f12 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -73,6 +73,9 @@ static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target,
unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
+ MCFixupKind Kind = Fixup.getKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return Kind - FirstLiteralRelocationKind;
MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Target, Fixup);
// determine the type of the relocation
@@ -83,6 +86,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
llvm_unreachable("Unimplemented");
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
+ case PPC::fixup_ppc_br24_notoc:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
case MCSymbolRefExpr::VK_None:
@@ -94,6 +98,9 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_LOCAL:
Type = ELF::R_PPC_LOCAL24PC;
break;
+ case MCSymbolRefExpr::VK_PPC_NOTOC:
+ Type = ELF::R_PPC64_REL24_NOTOC;
+ break;
}
break;
case PPC::fixup_ppc_brcond14:
@@ -121,6 +128,18 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
Target.print(errs());
errs() << '\n';
report_fatal_error("Invalid PC-relative half16ds relocation");
+ case PPC::fixup_ppc_pcrel34:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unsupported Modifier for fixup_ppc_pcrel34");
+ case MCSymbolRefExpr::VK_PCREL:
+ Type = ELF::R_PPC64_PCREL34;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_PCREL:
+ Type = ELF::R_PPC64_GOT_PCREL34;
+ break;
+ }
+ break;
case FK_Data_4:
case FK_PCRel_4:
Type = ELF::R_PPC_REL32;
@@ -133,9 +152,6 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
} else {
switch (Fixup.getTargetKind()) {
default: llvm_unreachable("invalid fixup kind!");
- case FK_NONE:
- Type = ELF::R_PPC_NONE;
- break;
case PPC::fixup_ppc_br24abs:
Type = ELF::R_PPC_ADDR24;
break;
@@ -431,6 +447,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
return false;
case ELF::R_PPC_REL24:
+ case ELF::R_PPC64_REL24_NOTOC:
// If the target symbol has a local entry point, we must keep the
// target symbol to preserve that information for the linker.
// The "other" values are stored in the last 6 bits of the second byte.
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
new file mode 100644
index 0000000000000..4373778cc96cc
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
@@ -0,0 +1,112 @@
+//===-------- PPCELFStreamer.cpp - ELF Object Output ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a custom MCELFStreamer for PowerPC.
+//
+// The purpose of the custom ELF streamer is to allow us to intercept
+// instructions as they are being emitted and align all 8 byte instructions
+// to a 64 byte boundary if required (by adding a 4 byte nop). This is important
+// because 8 byte instructions are not allowed to cross 64 byte boundaries
+// and by aliging anything that is within 4 bytes of the boundary we can
+// guarantee that the 8 byte instructions do not cross that boundary.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "PPCELFStreamer.h"
+#include "PPCInstrInfo.h"
+#include "PPCMCCodeEmitter.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/SourceMgr.h"
+
+using namespace llvm;
+
+PPCELFStreamer::PPCELFStreamer(MCContext &Context,
+ std::unique_ptr<MCAsmBackend> MAB,
+ std::unique_ptr<MCObjectWriter> OW,
+ std::unique_ptr<MCCodeEmitter> Emitter)
+ : MCELFStreamer(Context, std::move(MAB), std::move(OW),
+ std::move(Emitter)), LastLabel(NULL) {
+}
+
+void PPCELFStreamer::emitPrefixedInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ // Prefixed instructions must not cross a 64-byte boundary (i.e. prefix is
+ // before the boundary and the remaining 4-bytes are after the boundary). In
+ // order to achieve this, a nop is added prior to any such boundary-crossing
+ // prefixed instruction. Align to 64 bytes if possible but add a maximum of 4
+ // bytes when trying to do that. If alignment requires adding more than 4
+ // bytes then the instruction won't be aligned. When emitting a code alignment
+ // a new fragment is created for this alignment. This fragment will contain
+ // all of the nops required as part of the alignment operation. In the cases
+ // when no nops are added then The fragment is still created but it remains
+ // empty.
+ emitCodeAlignment(64, 4);
+
+ // Emit the instruction.
+ // Since the previous emit created a new fragment then adding this instruction
+ // also forces the addition of a new fragment. Inst is now the first
+ // instruction in that new fragment.
+ MCELFStreamer::emitInstruction(Inst, STI);
+
+ // The above instruction is forced to start a new fragment because it
+ // comes after a code alignment fragment. Get that new fragment.
+ MCFragment *InstructionFragment = getCurrentFragment();
+ SMLoc InstLoc = Inst.getLoc();
+ // Check if there was a last label emitted.
+ if (LastLabel && !LastLabel->isUnset() && LastLabelLoc.isValid() &&
+ InstLoc.isValid()) {
+ const SourceMgr *SourceManager = getContext().getSourceManager();
+ unsigned InstLine = SourceManager->FindLineNumber(InstLoc);
+ unsigned LabelLine = SourceManager->FindLineNumber(LastLabelLoc);
+ // If the Label and the Instruction are on the same line then move the
+ // label to the top of the fragment containing the aligned instruction that
+ // was just added.
+ if (InstLine == LabelLine) {
+ AssignFragment(LastLabel, InstructionFragment);
+ LastLabel->setOffset(0);
+ }
+ }
+}
+
+void PPCELFStreamer::emitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ PPCMCCodeEmitter *Emitter =
+ static_cast<PPCMCCodeEmitter*>(getAssembler().getEmitterPtr());
+
+ // Special handling is only for prefixed instructions.
+ if (!Emitter->isPrefixedInstruction(Inst)) {
+ MCELFStreamer::emitInstruction(Inst, STI);
+ return;
+ }
+ emitPrefixedInstruction(Inst, STI);
+}
+
+void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
+ LastLabel = Symbol;
+ LastLabelLoc = Loc;
+ MCELFStreamer::emitLabel(Symbol);
+}
+
+MCELFStreamer *llvm::createPPCELFStreamer(
+ MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
+ std::unique_ptr<MCObjectWriter> OW,
+ std::unique_ptr<MCCodeEmitter> Emitter) {
+ return new PPCELFStreamer(Context, std::move(MAB), std::move(OW),
+ std::move(Emitter));
+}
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
new file mode 100644
index 0000000000000..51863232d0719
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
@@ -0,0 +1,54 @@
+//===- PPCELFStreamer.h - ELF Object Output --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a custom MCELFStreamer for PowerPC.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_PPC_MCELFSTREAMER_PPCELFSTREAMER_H
+#define LLVM_LIB_TARGET_PPC_MCELFSTREAMER_PPCELFSTREAMER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include <memory>
+
+namespace llvm {
+
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCSubtargetInfo;
+
+class PPCELFStreamer : public MCELFStreamer {
+ // We need to keep track of the last label we emitted (only one) because
+ // depending on whether the label is on the same line as an aligned
+ // instruction or not, the label may refer to the instruction or the nop.
+ MCSymbol *LastLabel;
+ SMLoc LastLabelLoc;
+
+public:
+ PPCELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
+ std::unique_ptr<MCObjectWriter> OW,
+ std::unique_ptr<MCCodeEmitter> Emitter);
+
+ void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+
+ // EmitLabel updates LastLabel and LastLabelLoc when a new label is emitted.
+ void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
+private:
+ void emitPrefixedInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
+};
+
+MCELFStreamer *createPPCELFStreamer(MCContext &Context,
+ std::unique_ptr<MCAsmBackend> MAB,
+ std::unique_ptr<MCObjectWriter> OW,
+ std::unique_ptr<MCCodeEmitter> Emitter);
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_PPC_MCELFSTREAMER_PPCELFSTREAMER_H
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 845489788c86f..2fb8947fd4e0f 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -19,6 +19,10 @@ enum Fixups {
// 24-bit PC relative relocation for direct branches like 'b' and 'bl'.
fixup_ppc_br24 = FirstTargetFixupKind,
+ // 24-bit PC relative relocation for direct branches like 'b' and 'bl' where
+ // the caller does not use the TOC.
+ fixup_ppc_br24_notoc,
+
/// 14-bit PC relative relocation for conditional branches.
fixup_ppc_brcond14,
@@ -36,6 +40,9 @@ enum Fixups {
/// instrs like 'std'.
fixup_ppc_half16ds,
+ // A 34-bit fixup corresponding to PC-relative paddi.
+ fixup_ppc_pcrel34,
+
/// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the
/// TLS general and local dynamic models, or inserts the thread-pointer
/// register number.
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 9cc1c539e24a9..16da62a74b8cd 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -116,16 +116,6 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
}
}
- if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
- MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
- O << "\tmr ";
- printOperand(MI, 0, O);
- O << ", ";
- printOperand(MI, 1, O);
- printAnnotation(O, Annot);
- return;
- }
-
if (MI->getOpcode() == PPC::RLDICR ||
MI->getOpcode() == PPC::RLDICR_32) {
unsigned char SH = MI->getOperand(2).getImm();
@@ -193,7 +183,7 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
}
}
- if (!printAliasInstr(MI, O))
+ if (!printAliasInstr(MI, Address, O))
printInstruction(MI, Address, O);
printAnnotation(O, Annot);
}
@@ -339,6 +329,13 @@ void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
O << (int)Value;
}
+void PPCInstPrinter::printImmZeroOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value == 0 && "Operand must be zero");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
@@ -391,6 +388,17 @@ void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo, O);
}
+void PPCInstPrinter::printS34ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm()) {
+ long long Value = MI->getOperand(OpNo).getImm();
+ assert(isInt<34>(Value) && "Invalid s34imm argument!");
+ O << (long long)Value;
+ }
+ else
+ printOperand(MI, OpNo, O);
+}
+
void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).isImm())
@@ -399,18 +407,29 @@ void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo, O);
}
-void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+void PPCInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpNo, raw_ostream &O) {
if (!MI->getOperand(OpNo).isImm())
return printOperand(MI, OpNo, O);
-
- // Branches can take an immediate operand. This is used by the branch
- // selection pass to print .+8, an eight byte displacement from the PC.
- O << ".";
int32_t Imm = SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
- if (Imm >= 0)
- O << "+";
- O << Imm;
+ if (PrintBranchImmAsAddress) {
+ uint64_t Target = Address + Imm;
+ if (!TT.isPPC64())
+ Target &= 0xffffffff;
+ O << formatHex(Target);
+ } else {
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print, for example `.+8` (for ELF) or `$+8` (for AIX)
+ // to express an eight byte displacement from the program counter.
+ if (!TT.isOSAIX())
+ O << ".";
+ else
+ O << "$";
+
+ if (Imm >= 0)
+ O << "+";
+ O << Imm;
+ }
}
void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
@@ -451,6 +470,22 @@ void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
O << ')';
}
+void PPCInstPrinter::printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printS34ImmOperand(MI, OpNo, O);
+ O << '(';
+ printImmZeroOperand(MI, OpNo + 1, O);
+ O << ')';
+}
+
+void PPCInstPrinter::printMemRegImm34(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printS34ImmOperand(MI, OpNo, O);
+ O << '(';
+ printOperand(MI, OpNo + 1, O);
+ O << ')';
+}
+
void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
// When used as the base register, r0 reads constant zero rather than
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
index a3ec41aa348d6..9763aeceef941 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
@@ -39,9 +39,9 @@ public:
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
- void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx,
+ bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS);
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
raw_ostream &OS);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -61,14 +61,19 @@ public:
void printU10ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS34ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printImmZeroOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printBranchOperand(const MCInst *MI, uint64_t Address, unsigned OpNo,
+ raw_ostream &O);
void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printTLSCall(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegImm34PCRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegImm34(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index dc2c216a3efd4..593dc2843c3d3 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -12,6 +12,7 @@
#include "PPCMCAsmInfo.h"
#include "llvm/ADT/Triple.h"
+#include <cassert>
using namespace llvm;
@@ -50,15 +51,15 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr;
AssemblerDialect = 1; // New-Style mnemonics.
LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
-
- UseIntegratedAssembler = true;
}
void PPCXCOFFMCAsmInfo::anchor() {}
PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
- assert(!IsLittleEndian && "Little-endian XCOFF not supported.");
+ if (T.getArch() == Triple::ppc64le)
+ report_fatal_error("XCOFF is not supported for little-endian targets");
CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4;
- ZeroDirective = "\t.space\t";
- SymbolsHaveSMC = true;
+
+ // A size of 8 is only supported by the assembler under 64-bit.
+ Data64bitsDirective = Is64Bit ? "\t.vbyte\t8, " : nullptr;
}
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 8c52bbbd8a56a..27c687686641f 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -28,7 +28,7 @@ public:
};
class PPCXCOFFMCAsmInfo : public MCAsmInfoXCOFF {
- virtual void anchor();
+ void anchor() override;
public:
explicit PPCXCOFFMCAsmInfo(bool is64Bit, const Triple &);
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 676efc5004553..fb65e7320f2b0 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -48,7 +48,9 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_br24));
+ ((MI.getOpcode() == PPC::BL8_NOTOC)
+ ? (MCFixupKind)PPC::fixup_ppc_br24_notoc
+ : (MCFixupKind)PPC::fixup_ppc_br24)));
return 0;
}
@@ -102,6 +104,20 @@ unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
return 0;
}
+uint64_t
+PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm())
+ return getMachineOpValue(MI, MO, Fixups, STI);
+
+ // Add a fixup for the immediate field.
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_pcrel34));
+ return 0;
+}
+
unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -159,6 +175,104 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
return RegBits;
}
+uint64_t
+PPCMCCodeEmitter::getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Encode the PCRelative version of memri34: imm34(r0).
+ // In the PC relative version the register for the address must be zero.
+ // The 34 bit immediate can fall into one of three cases:
+ // 1) It is a relocation to be filled in by the linker represented as:
+ // (MCExpr::SymbolRef)
+ // 2) It is a relocation + SignedOffset represented as:
+ // (MCExpr::Binary(MCExpr::SymbolRef + MCExpr::Constant))
+ // 3) It is a known value at compile time.
+
+ // Make sure that the register is a zero as expected.
+ assert(MI.getOperand(OpNo + 1).isImm() && "Expecting an immediate.");
+ uint64_t RegBits =
+ getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI) << 34;
+ assert(RegBits == 0 && "Operand must be 0.");
+
+ // If this is not a MCExpr then we are in case 3) and we are dealing with
+ // a value known at compile time, not a relocation.
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (!MO.isExpr())
+ return ((getMachineOpValue(MI, MO, Fixups, STI)) & 0x3FFFFFFFFUL) | RegBits;
+
+ // At this point in the function it is known that MO is of type MCExpr.
+ // Therefore we are dealing with either case 1) a symbol ref or
+ // case 2) a symbol ref plus a constant.
+ const MCExpr *Expr = MO.getExpr();
+ switch (Expr->getKind()) {
+ default:
+ llvm_unreachable("Unsupported MCExpr for getMemRI34PCRelEncoding.");
+ case MCExpr::SymbolRef: {
+ // Relocation alone.
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(Expr);
+ (void)SRE;
+ // Currently these are the only valid PCRelative Relocations.
+ assert((SRE->getKind() == MCSymbolRefExpr::VK_PCREL ||
+ SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL) &&
+ "VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL");
+ // Generate the fixup for the relocation.
+ Fixups.push_back(
+ MCFixup::create(0, Expr,
+ static_cast<MCFixupKind>(PPC::fixup_ppc_pcrel34)));
+ // Put zero in the location of the immediate. The linker will fill in the
+ // correct value based on the relocation.
+ return 0;
+ }
+ case MCExpr::Binary: {
+ // Relocation plus some offset.
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+ assert(BE->getOpcode() == MCBinaryExpr::Add &&
+ "Binary expression opcode must be an add.");
+
+ const MCExpr *LHS = BE->getLHS();
+ const MCExpr *RHS = BE->getRHS();
+
+ // Need to check in both directions. Reloc+Offset and Offset+Reloc.
+ if (LHS->getKind() != MCExpr::SymbolRef)
+ std::swap(LHS, RHS);
+
+ if (LHS->getKind() != MCExpr::SymbolRef ||
+ RHS->getKind() != MCExpr::Constant)
+ llvm_unreachable("Expecting to have one constant and one relocation.");
+
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(LHS);
+ (void)SRE;
+ assert(isInt<34>(cast<MCConstantExpr>(RHS)->getValue()) &&
+ "Value must fit in 34 bits.");
+
+ // Currently these are the only valid PCRelative Relocations.
+ assert((SRE->getKind() == MCSymbolRefExpr::VK_PCREL ||
+ SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL) &&
+ "VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL");
+ // Generate the fixup for the relocation.
+ Fixups.push_back(
+ MCFixup::create(0, Expr,
+ static_cast<MCFixupKind>(PPC::fixup_ppc_pcrel34)));
+ // Put zero in the location of the immediate. The linker will fill in the
+ // correct value based on the relocation.
+ return 0;
+ }
+ }
+}
+
+uint64_t
+PPCMCCodeEmitter::getMemRI34Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Encode (imm, reg) as a memri34, which has the low 34-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo + 1).isReg() && "Expecting a register.");
+ uint64_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI)
+ << 34;
+ const MCOperand &MO = MI.getOperand(OpNo);
+ return ((getMachineOpValue(MI, MO, Fixups, STI)) & 0x3FFFFFFFFUL) | RegBits;
+}
+
unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI)
@@ -257,7 +371,7 @@ static unsigned getOpIdxForMO(const MCInst &MI, const MCOperand &MO) {
return ~0U; // Silence any warnings about no return.
}
-unsigned PPCMCCodeEmitter::
+uint64_t PPCMCCodeEmitter::
getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -316,5 +430,11 @@ unsigned PPCMCCodeEmitter::getInstSizeInBytes(const MCInst &MI) const {
return Desc.getSize();
}
+bool PPCMCCodeEmitter::isPrefixedInstruction(const MCInst &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ const PPCInstrInfo *InstrInfo = static_cast<const PPCInstrInfo*>(&MCII);
+ return InstrInfo->isPrefixed(Opcode);
+}
+
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "PPCGenMCCodeEmitter.inc"
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
index 1324faa125533..588aa76bd8064 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -50,6 +50,9 @@ public:
unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ uint64_t getImm34Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -59,6 +62,12 @@ public:
unsigned getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ uint64_t getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getMemRI34Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -80,7 +89,7 @@ public:
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -97,6 +106,9 @@ public:
// Get the number of bytes used to encode the given MCInst.
unsigned getInstSizeInBytes(const MCInst &MI) const;
+ // Is this instruction a prefixed instruction.
+ bool isPrefixedInstruction(const MCInst &MI) const;
+
private:
FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
void
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index fb9dd5d7aa758..abff444491313 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -17,39 +17,44 @@ using namespace llvm;
#define DEBUG_TYPE "ppcmcexpr"
-const PPCMCExpr*
-PPCMCExpr::create(VariantKind Kind, const MCExpr *Expr,
- bool IsDarwin, MCContext &Ctx) {
- return new (Ctx) PPCMCExpr(Kind, Expr, IsDarwin);
+const PPCMCExpr *PPCMCExpr::create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) PPCMCExpr(Kind, Expr);
}
void PPCMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
- if (isDarwinSyntax()) {
- switch (Kind) {
- default: llvm_unreachable("Invalid kind!");
- case VK_PPC_LO: OS << "lo16"; break;
- case VK_PPC_HI: OS << "hi16"; break;
- case VK_PPC_HA: OS << "ha16"; break;
- }
-
- OS << '(';
- getSubExpr()->print(OS, MAI);
- OS << ')';
- } else {
- getSubExpr()->print(OS, MAI);
+ getSubExpr()->print(OS, MAI);
- switch (Kind) {
- default: llvm_unreachable("Invalid kind!");
- case VK_PPC_LO: OS << "@l"; break;
- case VK_PPC_HI: OS << "@h"; break;
- case VK_PPC_HA: OS << "@ha"; break;
- case VK_PPC_HIGH: OS << "@high"; break;
- case VK_PPC_HIGHA: OS << "@higha"; break;
- case VK_PPC_HIGHER: OS << "@higher"; break;
- case VK_PPC_HIGHERA: OS << "@highera"; break;
- case VK_PPC_HIGHEST: OS << "@highest"; break;
- case VK_PPC_HIGHESTA: OS << "@highesta"; break;
- }
+ switch (Kind) {
+ default:
+ llvm_unreachable("Invalid kind!");
+ case VK_PPC_LO:
+ OS << "@l";
+ break;
+ case VK_PPC_HI:
+ OS << "@h";
+ break;
+ case VK_PPC_HA:
+ OS << "@ha";
+ break;
+ case VK_PPC_HIGH:
+ OS << "@high";
+ break;
+ case VK_PPC_HIGHA:
+ OS << "@higha";
+ break;
+ case VK_PPC_HIGHER:
+ OS << "@higher";
+ break;
+ case VK_PPC_HIGHERA:
+ OS << "@highera";
+ break;
+ case VK_PPC_HIGHEST:
+ OS << "@highest";
+ break;
+ case VK_PPC_HIGHESTA:
+ OS << "@highesta";
+ break;
}
}
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index ad1454566162a..1dbc7eae63c8a 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -33,33 +33,29 @@ public:
private:
const VariantKind Kind;
const MCExpr *Expr;
- bool IsDarwin;
int64_t evaluateAsInt64(int64_t Value) const;
- explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr, bool IsDarwin)
- : Kind(Kind), Expr(Expr), IsDarwin(IsDarwin) {}
+ explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr)
+ : Kind(Kind), Expr(Expr) {}
public:
/// @name Construction
/// @{
static const PPCMCExpr *create(VariantKind Kind, const MCExpr *Expr,
- bool IsDarwin, MCContext &Ctx);
+ MCContext &Ctx);
- static const PPCMCExpr *createLo(const MCExpr *Expr,
- bool IsDarwin, MCContext &Ctx) {
- return create(VK_PPC_LO, Expr, IsDarwin, Ctx);
+ static const PPCMCExpr *createLo(const MCExpr *Expr, MCContext &Ctx) {
+ return create(VK_PPC_LO, Expr, Ctx);
}
- static const PPCMCExpr *createHi(const MCExpr *Expr,
- bool IsDarwin, MCContext &Ctx) {
- return create(VK_PPC_HI, Expr, IsDarwin, Ctx);
+ static const PPCMCExpr *createHi(const MCExpr *Expr, MCContext &Ctx) {
+ return create(VK_PPC_HI, Expr, Ctx);
}
- static const PPCMCExpr *createHa(const MCExpr *Expr,
- bool IsDarwin, MCContext &Ctx) {
- return create(VK_PPC_HA, Expr, IsDarwin, Ctx);
+ static const PPCMCExpr *createHa(const MCExpr *Expr, MCContext &Ctx) {
+ return create(VK_PPC_HA, Expr, Ctx);
}
/// @}
@@ -72,10 +68,6 @@ public:
/// getSubExpr - Get the child of this expression.
const MCExpr *getSubExpr() const { return Expr; }
- /// isDarwinSyntax - True if expression is to be printed using Darwin syntax.
- bool isDarwinSyntax() const { return IsDarwin; }
-
-
/// @}
void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index cbfb8e2ff2825..3092d56da1c59 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -13,6 +13,7 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCAsmInfo.h"
+#include "PPCELFStreamer.h"
#include "PPCTargetStreamer.h"
#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -20,11 +21,14 @@
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -91,12 +95,21 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
// Initial state of the frame pointer is R1.
unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
MCCFIInstruction Inst =
- MCCFIInstruction::createDefCfa(nullptr, MRI.getDwarfRegNum(Reg, true), 0);
+ MCCFIInstruction::cfiDefCfa(nullptr, MRI.getDwarfRegNum(Reg, true), 0);
MAI->addInitialFrameState(Inst);
return MAI;
}
+static MCStreamer *createPPCMCStreamer(const Triple &T, MCContext &Context,
+ std::unique_ptr<MCAsmBackend> &&MAB,
+ std::unique_ptr<MCObjectWriter> &&OW,
+ std::unique_ptr<MCCodeEmitter> &&Emitter,
+ bool RelaxAll) {
+ return createPPCELFStreamer(Context, std::move(MAB), std::move(OW),
+ std::move(Emitter));
+}
+
namespace {
class PPCTargetAsmStreamer : public PPCTargetStreamer {
@@ -107,14 +120,17 @@ public:
: PPCTargetStreamer(S), OS(OS) {}
void emitTCEntry(const MCSymbol &S) override {
- const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
- OS << "\t.tc ";
- OS << (MAI->getSymbolsHaveSMC()
- ? cast<MCSymbolXCOFF>(S).getUnqualifiedName()
- : S.getName());
- OS << "[TC],";
- OS << S.getName();
- OS << '\n';
+ if (const MCSymbolXCOFF *XSym = dyn_cast<MCSymbolXCOFF>(&S)) {
+ MCSymbolXCOFF *TCSym =
+ cast<MCSymbolXCOFF>(Streamer.getContext().getOrCreateSymbol(
+ XSym->getSymbolTableName() + "[TC]"));
+ if (TCSym->hasRename())
+ Streamer.emitXCOFFRenameDirective(TCSym, TCSym->getSymbolTableName());
+ OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n';
+ return;
+ }
+
+ OS << "\t.tc " << S.getName() << "[TC]," << S.getName() << '\n';
}
void emitMachine(StringRef CPU) override {
@@ -146,8 +162,8 @@ public:
void emitTCEntry(const MCSymbol &S) override {
// Creates a R_PPC64_TOC relocation
- Streamer.EmitValueToAlignment(8);
- Streamer.EmitSymbolValue(&S, 8);
+ Streamer.emitValueToAlignment(8);
+ Streamer.emitSymbolValue(&S, 8);
}
void emitMachine(StringRef CPU) override {
@@ -166,13 +182,9 @@ public:
void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
MCAssembler &MCA = getStreamer().getAssembler();
- int64_t Res;
- if (!LocalOffset->evaluateAsAbsolute(Res, MCA))
- report_fatal_error(".localentry expression must be absolute.");
-
- unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res);
- if (Res != ELF::decodePPC64LocalEntryOffset(Encoded))
- report_fatal_error(".localentry expression cannot be encoded.");
+ // encodePPC64LocalEntryOffset will report an error if it cannot
+ // encode LocalOffset.
+ unsigned Encoded = encodePPC64LocalEntryOffset(LocalOffset);
unsigned Other = S->getOther();
Other &= ~ELF::STO_PPC64_LOCAL_MASK;
@@ -201,6 +213,10 @@ public:
for (auto *Sym : UpdateOther)
if (Sym->isVariable())
copyLocalEntry(Sym, Sym->getVariableValue());
+
+ // Clear the set of symbols that needs to be updated so the streamer can
+ // be reused without issues.
+ UpdateOther.clear();
}
private:
@@ -217,6 +233,31 @@ private:
D->setOther(Other);
return true;
}
+
+ unsigned encodePPC64LocalEntryOffset(const MCExpr *LocalOffset) {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ int64_t Offset;
+ if (!LocalOffset->evaluateAsAbsolute(Offset, MCA))
+ MCA.getContext().reportFatalError(
+ LocalOffset->getLoc(), ".localentry expression must be absolute.");
+
+ switch (Offset) {
+ default:
+ MCA.getContext().reportFatalError(
+ LocalOffset->getLoc(),
+ ".localentry expression is not a valid power of 2.");
+ case 0:
+ return 0;
+ case 1:
+ return 1 << ELF::STO_PPC64_LOCAL_BIT;
+ case 4:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ return (int)Log2(Offset) << (int)ELF::STO_PPC64_LOCAL_BIT;
+ }
+ }
};
class PPCTargetMachOStreamer : public PPCTargetStreamer {
@@ -248,8 +289,8 @@ public:
void emitTCEntry(const MCSymbol &S) override {
const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
const unsigned PointerSize = MAI->getCodePointerSize();
- Streamer.EmitValueToAlignment(PointerSize);
- Streamer.EmitSymbolValue(&S, PointerSize);
+ Streamer.emitValueToAlignment(PointerSize);
+ Streamer.emitSymbolValue(&S, PointerSize);
}
void emitMachine(StringRef CPU) override {
@@ -313,6 +354,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetMC() {
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(*T, createPPCAsmBackend);
+ // Register the elf streamer.
+ TargetRegistry::RegisterELFStreamer(*T, createPPCMCStreamer);
+
// Register the object target streamer.
TargetRegistry::RegisterObjectTargetStreamer(*T,
createObjectTargetStreamer);
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 49443679bb312..719e005d98135 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -32,9 +32,6 @@ class MCRegisterInfo;
class MCSubtargetInfo;
class MCTargetOptions;
class Target;
-class Triple;
-class StringRef;
-class raw_pwrite_stream;
MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
deleted file mode 100644
index 672f910ab086e..0000000000000
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ /dev/null
@@ -1,380 +0,0 @@
-//===-- PPCMachObjectWriter.cpp - PPC Mach-O Writer -----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/PPCFixupKinds.h"
-#include "MCTargetDesc/PPCMCTargetDesc.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/MachO.h"
-#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCMachObjectWriter.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-
-using namespace llvm;
-
-namespace {
-class PPCMachObjectWriter : public MCMachObjectTargetWriter {
- bool recordScatteredRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup, MCValue Target,
- unsigned Log2Size, uint64_t &FixedValue);
-
- void RecordPPCRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue);
-
-public:
- PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype)
- : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
-
- void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
- const MCAsmLayout &Layout, const MCFragment *Fragment,
- const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue) override {
- if (Writer->is64Bit()) {
- report_fatal_error("Relocation emission for MachO/PPC64 unimplemented.");
- } else
- RecordPPCRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
- FixedValue);
- }
-};
-}
-
-/// computes the log2 of the size of the relocation,
-/// used for relocation_info::r_length.
-static unsigned getFixupKindLog2Size(unsigned Kind) {
- switch (Kind) {
- default:
- report_fatal_error("log2size(FixupKind): Unhandled fixup kind!");
- case FK_PCRel_1:
- case FK_Data_1:
- return 0;
- case FK_PCRel_2:
- case FK_Data_2:
- return 1;
- case FK_PCRel_4:
- case PPC::fixup_ppc_brcond14:
- case PPC::fixup_ppc_half16:
- case PPC::fixup_ppc_br24:
- case FK_Data_4:
- return 2;
- case FK_PCRel_8:
- case FK_Data_8:
- return 3;
- }
- return 0;
-}
-
-/// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum.
-/// Outline based on PPCELFObjectWriter::getRelocType().
-static unsigned getRelocType(const MCValue &Target,
- const MCFixupKind FixupKind, // from
- // Fixup.getKind()
- const bool IsPCRel) {
- const MCSymbolRefExpr::VariantKind Modifier =
- Target.isAbsolute() ? MCSymbolRefExpr::VK_None
- : Target.getSymA()->getKind();
- // determine the type of the relocation
- unsigned Type = MachO::GENERIC_RELOC_VANILLA;
- if (IsPCRel) { // relative to PC
- switch ((unsigned)FixupKind) {
- default:
- report_fatal_error("Unimplemented fixup kind (relative)");
- case PPC::fixup_ppc_br24:
- Type = MachO::PPC_RELOC_BR24; // R_PPC_REL24
- break;
- case PPC::fixup_ppc_brcond14:
- Type = MachO::PPC_RELOC_BR14;
- break;
- case PPC::fixup_ppc_half16:
- switch (Modifier) {
- default:
- llvm_unreachable("Unsupported modifier for half16 fixup");
- case MCSymbolRefExpr::VK_PPC_HA:
- Type = MachO::PPC_RELOC_HA16;
- break;
- case MCSymbolRefExpr::VK_PPC_LO:
- Type = MachO::PPC_RELOC_LO16;
- break;
- case MCSymbolRefExpr::VK_PPC_HI:
- Type = MachO::PPC_RELOC_HI16;
- break;
- }
- break;
- }
- } else {
- switch ((unsigned)FixupKind) {
- default:
- report_fatal_error("Unimplemented fixup kind (absolute)!");
- case PPC::fixup_ppc_half16:
- switch (Modifier) {
- default:
- llvm_unreachable("Unsupported modifier for half16 fixup");
- case MCSymbolRefExpr::VK_PPC_HA:
- Type = MachO::PPC_RELOC_HA16_SECTDIFF;
- break;
- case MCSymbolRefExpr::VK_PPC_LO:
- Type = MachO::PPC_RELOC_LO16_SECTDIFF;
- break;
- case MCSymbolRefExpr::VK_PPC_HI:
- Type = MachO::PPC_RELOC_HI16_SECTDIFF;
- break;
- }
- break;
- case FK_Data_4:
- break;
- case FK_Data_2:
- break;
- }
- }
- return Type;
-}
-
-static void makeRelocationInfo(MachO::any_relocation_info &MRE,
- const uint32_t FixupOffset, const uint32_t Index,
- const unsigned IsPCRel, const unsigned Log2Size,
- const unsigned IsExtern, const unsigned Type) {
- MRE.r_word0 = FixupOffset;
- // The bitfield offsets that work (as determined by trial-and-error)
- // are different than what is documented in the mach-o manuals.
- // This appears to be an endianness issue; reversing the order of the
- // documented bitfields in <llvm/BinaryFormat/MachO.h> fixes this (but
- // breaks x86/ARM assembly).
- MRE.r_word1 = ((Index << 8) | // was << 0
- (IsPCRel << 7) | // was << 24
- (Log2Size << 5) | // was << 25
- (IsExtern << 4) | // was << 27
- (Type << 0)); // was << 28
-}
-
-static void
-makeScatteredRelocationInfo(MachO::any_relocation_info &MRE,
- const uint32_t Addr, const unsigned Type,
- const unsigned Log2Size, const unsigned IsPCRel,
- const uint32_t Value2) {
- // For notes on bitfield positions and endianness, see:
- // https://developer.apple.com/library/mac/documentation/developertools/conceptual/MachORuntime/Reference/reference.html#//apple_ref/doc/uid/20001298-scattered_relocation_entry
- MRE.r_word0 = ((Addr << 0) | (Type << 24) | (Log2Size << 28) |
- (IsPCRel << 30) | MachO::R_SCATTERED);
- MRE.r_word1 = Value2;
-}
-
-/// Compute fixup offset (address).
-static uint32_t getFixupOffset(const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup) {
- uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
- // On Mach-O, ppc_fixup_half16 relocations must refer to the
- // start of the instruction, not the second halfword, as ELF does
- if (Fixup.getTargetKind() == PPC::fixup_ppc_half16)
- FixupOffset &= ~uint32_t(3);
- return FixupOffset;
-}
-
-/// \return false if falling back to using non-scattered relocation,
-/// otherwise true for normal scattered relocation.
-/// based on X86MachObjectWriter::recordScatteredRelocation
-/// and ARMMachObjectWriter::recordScatteredRelocation
-bool PPCMachObjectWriter::recordScatteredRelocation(
- MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
- unsigned Log2Size, uint64_t &FixedValue) {
- // caller already computes these, can we just pass and reuse?
- const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
- const MCFixupKind FK = Fixup.getKind();
- const unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, FK);
- const unsigned Type = getRelocType(Target, FK, IsPCRel);
-
- // Is this a local or SECTDIFF relocation entry?
- // SECTDIFF relocation entries have symbol subtractions,
- // and require two entries, the first for the add-symbol value,
- // the second for the subtract-symbol value.
-
- // See <reloc.h>.
- const MCSymbol *A = &Target.getSymA()->getSymbol();
-
- if (!A->getFragment())
- report_fatal_error("symbol '" + A->getName() +
- "' can not be undefined in a subtraction expression");
-
- uint32_t Value = Writer->getSymbolAddress(*A, Layout);
- uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
- FixedValue += SecAddr;
- uint32_t Value2 = 0;
-
- if (const MCSymbolRefExpr *B = Target.getSymB()) {
- const MCSymbol *SB = &B->getSymbol();
-
- if (!SB->getFragment())
- report_fatal_error("symbol '" + SB->getName() +
- "' can not be undefined in a subtraction expression");
-
- // FIXME: is Type correct? see include/llvm/BinaryFormat/MachO.h
- Value2 = Writer->getSymbolAddress(*SB, Layout);
- FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
- }
- // FIXME: does FixedValue get used??
-
- // Relocations are written out in reverse order, so the PAIR comes first.
- if (Type == MachO::PPC_RELOC_SECTDIFF ||
- Type == MachO::PPC_RELOC_HI16_SECTDIFF ||
- Type == MachO::PPC_RELOC_LO16_SECTDIFF ||
- Type == MachO::PPC_RELOC_HA16_SECTDIFF ||
- Type == MachO::PPC_RELOC_LO14_SECTDIFF ||
- Type == MachO::PPC_RELOC_LOCAL_SECTDIFF) {
- // X86 had this piece, but ARM does not
- // If the offset is too large to fit in a scattered relocation,
- // we're hosed. It's an unfortunate limitation of the MachO format.
- if (FixupOffset > 0xffffff) {
- char Buffer[32];
- format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
- Asm.getContext().reportError(Fixup.getLoc(),
- Twine("Section too large, can't encode "
- "r_address (") +
- Buffer + ") into 24 bits of scattered "
- "relocation entry.");
- return false;
- }
-
- // Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()?
- // see PPCMCExpr::evaluateAsRelocatableImpl()
- uint32_t other_half = 0;
- switch (Type) {
- case MachO::PPC_RELOC_LO16_SECTDIFF:
- other_half = (FixedValue >> 16) & 0xffff;
- // applyFixupOffset longer extracts the high part because it now assumes
- // this was already done.
- // It looks like this is not true for the FixedValue needed with Mach-O
- // relocs.
- // So we need to adjust FixedValue again here.
- FixedValue &= 0xffff;
- break;
- case MachO::PPC_RELOC_HA16_SECTDIFF:
- other_half = FixedValue & 0xffff;
- FixedValue =
- ((FixedValue >> 16) + ((FixedValue & 0x8000) ? 1 : 0)) & 0xffff;
- break;
- case MachO::PPC_RELOC_HI16_SECTDIFF:
- other_half = FixedValue & 0xffff;
- FixedValue = (FixedValue >> 16) & 0xffff;
- break;
- default:
- llvm_unreachable("Invalid PPC scattered relocation type.");
- break;
- }
-
- MachO::any_relocation_info MRE;
- makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR,
- Log2Size, IsPCRel, Value2);
- Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
- } else {
- // If the offset is more than 24-bits, it won't fit in a scattered
- // relocation offset field, so we fall back to using a non-scattered
- // relocation. This is a bit risky, as if the offset reaches out of
- // the block and the linker is doing scattered loading on this
- // symbol, things can go badly.
- //
- // Required for 'as' compatibility.
- if (FixupOffset > 0xffffff)
- return false;
- }
- MachO::any_relocation_info MRE;
- makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value);
- Writer->addRelocation(nullptr, Fragment->getParent(), MRE);
- return true;
-}
-
-// see PPCELFObjectWriter for a general outline of cases
-void PPCMachObjectWriter::RecordPPCRelocation(
- MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue) {
- const MCFixupKind FK = Fixup.getKind(); // unsigned
- const unsigned Log2Size = getFixupKindLog2Size(FK);
- const bool IsPCRel = Writer->isFixupKindPCRel(Asm, FK);
- const unsigned RelocType = getRelocType(Target, FK, IsPCRel);
-
- // If this is a difference or a defined symbol plus an offset, then we need a
- // scattered relocation entry. Differences always require scattered
- // relocations.
- if (Target.getSymB() &&
- // Q: are branch targets ever scattered?
- RelocType != MachO::PPC_RELOC_BR24 &&
- RelocType != MachO::PPC_RELOC_BR14) {
- recordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
- Log2Size, FixedValue);
- return;
- }
-
- // this doesn't seem right for RIT_PPC_BR24
- // Get the symbol data, if any.
- const MCSymbol *A = nullptr;
- if (Target.getSymA())
- A = &Target.getSymA()->getSymbol();
-
- // See <reloc.h>.
- const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
- unsigned Index = 0;
- unsigned Type = RelocType;
-
- const MCSymbol *RelSymbol = nullptr;
- if (Target.isAbsolute()) { // constant
- // SymbolNum of 0 indicates the absolute section.
- //
- // FIXME: Currently, these are never generated (see code below). I cannot
- // find a case where they are actually emitted.
- report_fatal_error("FIXME: relocations to absolute targets "
- "not yet implemented");
- // the above line stolen from ARM, not sure
- } else {
- // Resolve constant variables.
- if (A->isVariable()) {
- int64_t Res;
- if (A->getVariableValue()->evaluateAsAbsolute(
- Res, Layout, Writer->getSectionAddressMap())) {
- FixedValue = Res;
- return;
- }
- }
-
- // Check whether we need an external or internal relocation.
- if (Writer->doesSymbolRequireExternRelocation(*A)) {
- RelSymbol = A;
- // For external relocations, make sure to offset the fixup value to
- // compensate for the addend of the symbol address, if it was
- // undefined. This occurs with weak definitions, for example.
- if (!A->isUndefined())
- FixedValue -= Layout.getSymbolOffset(*A);
- } else {
- // The index is the section ordinal (1-based).
- const MCSection &Sec = A->getSection();
- Index = Sec.getOrdinal() + 1;
- FixedValue += Writer->getSectionAddress(&Sec);
- }
- if (IsPCRel)
- FixedValue -= Writer->getSectionAddress(Fragment->getParent());
- }
-
- // struct relocation_info (8 bytes)
- MachO::any_relocation_info MRE;
- makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, false, Type);
- Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
-}
-
-std::unique_ptr<MCObjectTargetWriter>
-llvm::createPPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
- uint32_t CPUSubtype) {
- return std::make_unique<PPCMachObjectWriter>(Is64Bit, CPUType, CPUSubtype);
-}
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index 7fdbb8990b553..d672d54772e0a 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -7,16 +7,26 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCXCOFFObjectWriter.h"
using namespace llvm;
namespace {
class PPCXCOFFObjectWriter : public MCXCOFFObjectTargetWriter {
+ static constexpr uint8_t SignBitMask = 0x80;
public:
PPCXCOFFObjectWriter(bool Is64Bit);
+
+ std::pair<uint8_t, uint8_t>
+ getRelocTypeAndSignSize(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel) const override;
};
} // end anonymous namespace
@@ -27,3 +37,40 @@ std::unique_ptr<MCObjectTargetWriter>
llvm::createPPCXCOFFObjectWriter(bool Is64Bit) {
return std::make_unique<PPCXCOFFObjectWriter>(Is64Bit);
}
+
+std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
+ const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const {
+ const MCSymbolRefExpr::VariantKind Modifier =
+ Target.isAbsolute() ? MCSymbolRefExpr::VK_None
+ : Target.getSymA()->getKind();
+ // People from AIX OS team says AIX link editor does not care about
+ // the sign bit in the relocation entry "most" of the time.
+ // The system assembler seems to set the sign bit on relocation entry
+ // based on similar property of IsPCRel. So we will do the same here.
+ // TODO: More investigation on how assembler decides to set the sign
+ // bit, and we might want to match that.
+ const uint8_t EncodedSignednessIndicator = IsPCRel ? SignBitMask : 0u;
+
+ // The magic number we use in SignAndSize has a strong relationship with
+ // the corresponding MCFixupKind. In most cases, it's the MCFixupKind
+ // number - 1, because SignAndSize encodes the bit length being
+ // relocated minus 1.
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ report_fatal_error("Unimplemented fixup kind.");
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default:
+ report_fatal_error("Unsupported modifier for half16 fixup.");
+ case MCSymbolRefExpr::VK_None:
+ return {XCOFF::RelocationType::R_TOC, EncodedSignednessIndicator | 15};
+ }
+ break;
+ case PPC::fixup_ppc_br24:
+ // Branches are 4 byte aligned, so the 24 bits we encode in
+ // the instruction actually represents a 26 bit offset.
+ return {XCOFF::RelocationType::R_RBR, EncodedSignednessIndicator | 25};
+ case FK_Data_4:
+ return {XCOFF::RelocationType::R_POS, EncodedSignednessIndicator | 31};
+ }
+}
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 9b3d13989ee29..d7e3519d5539e 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -373,6 +373,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
VMSUMSHS,
VMSUMUBM,
VMSUMUHM,
+ VMSUMUDM,
VMSUMUHS,
VMULESB,
VMULESH,
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index a83509f0e6870..7e0aa2c6061dc 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -51,10 +51,9 @@ namespace llvm {
FunctionPass *createPPCExpandISELPass();
FunctionPass *createPPCPreEmitPeepholePass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AsmPrinter &AP, bool IsDarwin);
+ AsmPrinter &AP);
bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
- MCOperand &OutMO, AsmPrinter &AP,
- bool IsDarwin);
+ MCOperand &OutMO, AsmPrinter &AP);
void initializePPCCTRLoopsPass(PassRegistry&);
#ifndef NDEBUG
@@ -99,33 +98,33 @@ namespace llvm {
/// the function's picbase, e.g. lo16(symbol-picbase).
MO_PIC_FLAG = 2,
- /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
- /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
- MO_NLP_FLAG = 4,
+ /// MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to
+ /// the current instruction address(pc), e.g., var@pcrel. Fixup is VK_PCREL.
+ MO_PCREL_FLAG = 4,
- /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
- /// symbol with hidden visibility. This causes a different kind of
- /// non-lazy-pointer to be generated.
- MO_NLP_HIDDEN_FLAG = 8,
+ /// MO_GOT_FLAG - If this bit is set the symbol reference is to be computed
+ /// via the GOT. For example when combined with the MO_PCREL_FLAG it should
+ /// produce the relocation @got@pcrel. Fixup is VK_PPC_GOT_PCREL.
+ MO_GOT_FLAG = 8,
/// The next are not flags but distinct values.
- MO_ACCESS_MASK = 0xf0,
+ MO_ACCESS_MASK = 0xf00,
/// MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
- MO_LO = 1 << 4,
- MO_HA = 2 << 4,
+ MO_LO = 1 << 8,
+ MO_HA = 2 << 8,
- MO_TPREL_LO = 4 << 4,
- MO_TPREL_HA = 3 << 4,
+ MO_TPREL_LO = 4 << 8,
+ MO_TPREL_HA = 3 << 8,
/// These values identify relocations on immediates folded
/// into memory operations.
- MO_DTPREL_LO = 5 << 4,
- MO_TLSLD_LO = 6 << 4,
- MO_TOC_LO = 7 << 4,
+ MO_DTPREL_LO = 5 << 8,
+ MO_TLSLD_LO = 6 << 8,
+ MO_TOC_LO = 7 << 8,
// Symbol for VK_PPC_TLS fixup attached to an ADD instruction
- MO_TLS = 8 << 4
+ MO_TLS = 8 << 8
};
} // end namespace PPCII
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index bef0a81ee3ad8..9ad78bf67fe6c 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -51,6 +51,7 @@ def DirectivePwr6x
def DirectivePwr7: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR7", "">;
def DirectivePwr8: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR8", "">;
def DirectivePwr9: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR9", "">;
+def DirectivePwr10: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR10", "">;
def DirectivePwrFuture
: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR_FUTURE", "">;
@@ -166,6 +167,16 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
"Enable Hardware Transactional Memory instructions">;
def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
"Implement mftb using the mfspr instruction">;
+def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true",
+ "Target supports instruction fusion">;
+def FeatureAddiLoadFusion : SubtargetFeature<"fuse-addi-load",
+ "HasAddiLoadFusion", "true",
+ "Power8 Addi-Load fusion",
+ [FeatureFusion]>;
+def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load",
+ "HasAddisLoadFusion", "true",
+ "Power8 Addis-Load fusion",
+ [FeatureFusion]>;
def FeatureUnalignedFloats :
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
"true", "CPU does not trap on unaligned FP access">;
@@ -194,7 +205,11 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
def FeatureISA3_0 : SubtargetFeature<"isa-v30-instructions", "IsISA3_0",
"true",
- "Enable instructions added in ISA 3.0.">;
+ "Enable instructions in ISA 3.0.">;
+def FeatureISA3_1 : SubtargetFeature<"isa-v31-instructions", "IsISA3_1",
+ "true",
+ "Enable instructions in ISA 3.1.",
+ [FeatureISA3_0]>;
def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true",
"Enable POWER9 Altivec instructions",
[FeatureISA3_0, FeatureP8Altivec]>;
@@ -202,6 +217,10 @@ def FeatureP9Vector : SubtargetFeature<"power9-vector", "HasP9Vector", "true",
"Enable POWER9 vector instructions",
[FeatureISA3_0, FeatureP8Vector,
FeatureP9Altivec]>;
+def FeatureP10Vector : SubtargetFeature<"power10-vector", "HasP10Vector",
+ "true",
+ "Enable POWER10 vector instructions",
+ [FeatureISA3_1, FeatureP9Vector]>;
// A separate feature for this even though it is equivalent to P9Vector
// because this is a feature of the implementation rather than the architecture
// and may go away with future CPU's.
@@ -209,6 +228,21 @@ def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units",
"VectorsUseTwoUnits",
"true",
"Vectors use two units">;
+def FeaturePrefixInstrs : SubtargetFeature<"prefix-instrs", "HasPrefixInstrs",
+ "true",
+ "Enable prefixed instructions",
+ [FeatureISA3_0, FeatureP8Vector,
+ FeatureP9Altivec]>;
+def FeaturePCRelativeMemops :
+ SubtargetFeature<"pcrelative-memops", "HasPCRelativeMemops", "true",
+ "Enable PC relative Memory Ops",
+ [FeatureISA3_0]>;
+
+def FeaturePredictableSelectIsExpensive :
+ SubtargetFeature<"predictable-select-expensive",
+ "PredictableSelectIsExpensive",
+ "true",
+ "Prefer likely predicted branches over selects">;
// Since new processors generally contain a superset of features of those that
// came before them, the idea is to make implementations of new processors
@@ -225,7 +259,7 @@ def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units",
// !listconcat(FutureProcessorInheritableFeatures,
// FutureProcessorSpecificFeatures)
-// Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
+// Makes it explicit and obvious what is new in FutureProcessor vs. Power8 as
// well as providing a single point of definition if the feature set will be
// used elsewhere.
def ProcessorFeatures {
@@ -262,25 +296,34 @@ def ProcessorFeatures {
!listconcat(P7InheritableFeatures, P7SpecificFeatures);
// Power8
- list<SubtargetFeature> P8AdditionalFeatures = [DirectivePwr8,
- FeatureP8Altivec,
- FeatureP8Vector,
- FeatureP8Crypto,
- FeatureHTM,
- FeatureDirectMove,
- FeatureICBT,
- FeaturePartwordAtomic];
- list<SubtargetFeature> P8SpecificFeatures = [];
+ list<SubtargetFeature> P8AdditionalFeatures =
+ [DirectivePwr8,
+ FeatureP8Altivec,
+ FeatureP8Vector,
+ FeatureP8Crypto,
+ FeatureHTM,
+ FeatureDirectMove,
+ FeatureICBT,
+ FeaturePartwordAtomic,
+ FeaturePredictableSelectIsExpensive
+ ];
+
+ list<SubtargetFeature> P8SpecificFeatures = [FeatureAddiLoadFusion,
+ FeatureAddisLoadFusion];
list<SubtargetFeature> P8InheritableFeatures =
!listconcat(P7InheritableFeatures, P8AdditionalFeatures);
list<SubtargetFeature> P8Features =
!listconcat(P8InheritableFeatures, P8SpecificFeatures);
// Power9
- list<SubtargetFeature> P9AdditionalFeatures = [DirectivePwr9,
- FeatureP9Altivec,
- FeatureP9Vector,
- FeatureISA3_0];
+ list<SubtargetFeature> P9AdditionalFeatures =
+ [DirectivePwr9,
+ FeatureP9Altivec,
+ FeatureP9Vector,
+ FeatureISA3_0,
+ FeaturePredictableSelectIsExpensive
+ ];
+
// Some features are unique to Power9 and there is no reason to assume
// they will be part of any future CPUs. One example is the narrower
// dispatch for vector operations than scalar ones. For the time being,
@@ -294,13 +337,25 @@ def ProcessorFeatures {
list<SubtargetFeature> P9Features =
!listconcat(P9InheritableFeatures, P9SpecificFeatures);
+ // Power10
+ // For P10 CPU we assume that all of the existing features from Power9
+ // still exist with the exception of those we know are Power9 specific.
+ list<SubtargetFeature> P10AdditionalFeatures =
+ [DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs,
+ FeaturePCRelativeMemops, FeatureP10Vector];
+ list<SubtargetFeature> P10SpecificFeatures = [];
+ list<SubtargetFeature> P10InheritableFeatures =
+ !listconcat(P9InheritableFeatures, P10AdditionalFeatures);
+ list<SubtargetFeature> P10Features =
+ !listconcat(P10InheritableFeatures, P10SpecificFeatures);
+
// Future
- // For future CPU we assume that all of the existing features from Power 9
- // still exist with the exception of those we know are Power 9 specific.
+ // For future CPU we assume that all of the existing features from Power10
+ // still exist with the exception of those we know are Power10 specific.
list<SubtargetFeature> FutureAdditionalFeatures = [];
list<SubtargetFeature> FutureSpecificFeatures = [];
list<SubtargetFeature> FutureInheritableFeatures =
- !listconcat(P9InheritableFeatures, FutureAdditionalFeatures);
+ !listconcat(P10InheritableFeatures, FutureAdditionalFeatures);
list<SubtargetFeature> FutureFeatures =
!listconcat(FutureInheritableFeatures, FutureSpecificFeatures);
}
@@ -442,7 +497,7 @@ def : ProcessorModel<"g5", G5Model,
def : ProcessorModel<"e500", PPCE500Model,
[DirectiveE500,
FeatureICBT, FeatureBookE,
- FeatureISEL, FeatureMFTB, FeatureSPE]>;
+ FeatureISEL, FeatureMFTB, FeatureMSYNC, FeatureSPE]>;
def : ProcessorModel<"e500mc", PPCE500mcModel,
[DirectiveE500mc,
FeatureSTFIWX, FeatureICBT, FeatureBookE,
@@ -505,6 +560,8 @@ def : ProcessorModel<"pwr6x", G5Model,
def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>;
def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>;
+// No scheduler model yet.
+def : ProcessorModel<"pwr10", NoSchedModel, ProcessorFeatures.P10Features>;
// No scheduler model for future CPU.
def : ProcessorModel<"future", NoSchedModel,
ProcessorFeatures.FutureFeatures>;
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 4311df5dbeb8a..bf5fe741bac8a 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -83,8 +83,6 @@ protected:
const PPCSubtarget *Subtarget = nullptr;
StackMaps SM;
- virtual MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO);
-
public:
explicit PPCAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
@@ -100,7 +98,7 @@ public:
return AsmPrinter::doInitialization(M);
}
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitInstruction(const MachineInstr *MI) override;
/// This function is for PrintAsmOperand and PrintAsmMemoryOperand,
/// invoked by EmitMSInlineAsmStr and EmitGCCInlineAsmStr only.
@@ -113,7 +111,7 @@ public:
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &O) override;
- void EmitEndOfAsmFile(Module &M) override;
+ void emitEndOfAsmFile(Module &M) override;
void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
@@ -137,37 +135,41 @@ public:
return "Linux PPC Assembly Printer";
}
- bool doFinalization(Module &M) override;
- void EmitStartOfAsmFile(Module &M) override;
+ void emitStartOfAsmFile(Module &M) override;
+ void emitEndOfAsmFile(Module &) override;
- void EmitFunctionEntryLabel() override;
+ void emitFunctionEntryLabel() override;
- void EmitFunctionBodyStart() override;
- void EmitFunctionBodyEnd() override;
- void EmitInstruction(const MachineInstr *MI) override;
+ void emitFunctionBodyStart() override;
+ void emitFunctionBodyEnd() override;
+ void emitInstruction(const MachineInstr *MI) override;
};
class PPCAIXAsmPrinter : public PPCAsmPrinter {
private:
static void ValidateGV(const GlobalVariable *GV);
-protected:
- MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO) override;
public:
PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
- : PPCAsmPrinter(TM, std::move(Streamer)) {}
+ : PPCAsmPrinter(TM, std::move(Streamer)) {
+ if (MAI->isLittleEndian())
+ report_fatal_error(
+ "cannot create AIX PPC Assembly Printer for a little-endian target");
+ }
StringRef getPassName() const override { return "AIX PPC Assembly Printer"; }
+ bool doInitialization(Module &M) override;
+
void SetupMachineFunction(MachineFunction &MF) override;
- const MCExpr *lowerConstant(const Constant *CV) override;
+ void emitGlobalVariable(const GlobalVariable *GV) override;
- void EmitGlobalVariable(const GlobalVariable *GV) override;
+ void emitFunctionDescriptor() override;
- void EmitFunctionDescriptor() override;
+ void emitEndOfAsmFile(Module &) override;
- void EmitEndOfAsmFile(Module &) override;
+ void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const override;
};
} // end anonymous namespace
@@ -176,23 +178,7 @@ void PPCAsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
raw_ostream &O) {
// Computing the address of a global symbol, not calling it.
const GlobalValue *GV = MO.getGlobal();
- MCSymbol *SymToPrint;
-
- // External or weakly linked global variables need non-lazily-resolved stubs
- if (Subtarget->hasLazyResolverStub(GV)) {
- SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
- SymToPrint);
- if (!StubSym.getPointer())
- StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
- !GV->hasInternalLinkage());
- } else {
- SymToPrint = getSymbol(GV);
- }
-
- SymToPrint->print(O, MAI);
-
+ getSymbol(GV)->print(O, MAI);
printOffset(MO.getOffset(), O);
}
@@ -208,9 +194,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
// Linux assembler (Others?) does not take register mnemonics.
// FIXME - What about special registers used in mfspr/mtspr?
- if (!Subtarget->isDarwin())
- RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
- O << RegName;
+ O << PPCRegisterInfo::stripRegisterPrefix(RegName);
return;
}
case MachineOperand::MO_Immediate:
@@ -298,18 +282,17 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
switch (ExtraCode[0]) {
default: return true; // Unknown modifier.
+ case 'L': // A memory reference to the upper word of a double word op.
+ O << getDataLayout().getPointerSize() << "(";
+ printOperand(MI, OpNo, O);
+ O << ")";
+ return false;
case 'y': // A memory reference for an X-form instruction
- {
- const char *RegName = "r0";
- if (!Subtarget->isDarwin())
- RegName = PPCRegisterInfo::stripRegisterPrefix(RegName);
- O << RegName << ", ";
- printOperand(MI, OpNo, O);
- return false;
- }
+ O << "0, ";
+ printOperand(MI, OpNo, O);
+ return false;
case 'U': // Print 'u' for update form.
case 'X': // Print 'x' for indexed form.
- {
// FIXME: Currently for PowerPC memory operands are always loaded
// into a register, so we never get an update or indexed form.
// This is bad even for offset forms, since even if we know we
@@ -319,7 +302,6 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
assert(MI->getOperand(OpNo).isReg());
return false;
}
- }
}
assert(MI->getOperand(OpNo).isReg());
@@ -339,7 +321,7 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(const MCSymbol *Sym) {
return TOCEntry;
}
-void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
+void PPCAsmPrinter::emitEndOfAsmFile(Module &M) {
emitStackMaps(SM);
}
@@ -348,7 +330,7 @@ void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
auto &Ctx = OutStreamer->getContext();
MCSymbol *MILabel = Ctx.createTempSymbol();
- OutStreamer->EmitLabel(MILabel);
+ OutStreamer->emitLabel(MILabel);
SM.recordStackMap(*MILabel, MI);
assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
@@ -377,7 +359,7 @@ void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
auto &Ctx = OutStreamer->getContext();
MCSymbol *MILabel = Ctx.createTempSymbol();
- OutStreamer->EmitLabel(MILabel);
+ OutStreamer->emitLabel(MILabel);
SM.recordPatchPoint(*MILabel, MI);
PatchPointOpers Opers(&MI);
@@ -516,16 +498,17 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
/// Map a machine operand for a TOC pseudo-machine instruction to its
/// corresponding MCSymbol.
-MCSymbol *PPCAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) {
+static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
+ AsmPrinter &AP) {
switch (MO.getType()) {
case MachineOperand::MO_GlobalAddress:
- return getSymbol(MO.getGlobal());
+ return AP.getSymbol(MO.getGlobal());
case MachineOperand::MO_ConstantPoolIndex:
- return GetCPISymbol(MO.getIndex());
+ return AP.GetCPISymbol(MO.getIndex());
case MachineOperand::MO_JumpTableIndex:
- return GetJTISymbol(MO.getIndex());
+ return AP.GetJTISymbol(MO.getIndex());
case MachineOperand::MO_BlockAddress:
- return GetBlockAddressSymbol(MO.getBlockAddress());
+ return AP.GetBlockAddressSymbol(MO.getBlockAddress());
default:
llvm_unreachable("Unexpected operand type to get symbol.");
}
@@ -534,9 +517,8 @@ MCSymbol *PPCAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) {
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
///
-void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
- const bool IsDarwin = TM.getTargetTriple().isOSDarwin();
const bool IsPPC64 = Subtarget->isPPC64();
const bool IsAIX = Subtarget->isAIXABI();
const Module *M = MF->getFunction().getParent();
@@ -614,7 +596,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
// Emit the label.
- OutStreamer->EmitLabel(PICBase);
+ OutStreamer->emitLabel(PICBase);
return;
}
case PPC::UpdateGBR: {
@@ -625,7 +607,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// addis r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@ha
// addi r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@l
// Get the offset from the GOT Base Register to the GOT
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
if (Subtarget->isSecurePlt() && isPositionIndependent() ) {
unsigned PICR = TmpInst.getOperand(0).getReg();
MCSymbol *BaseSymbol = OutContext.getOrCreateSymbol(
@@ -637,19 +619,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *DeltaExpr = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(BaseSymbol, OutContext), PB, OutContext);
- const MCExpr *DeltaHi = PPCMCExpr::createHa(DeltaExpr, false, OutContext);
+ const MCExpr *DeltaHi = PPCMCExpr::createHa(DeltaExpr, OutContext);
EmitToStreamer(
*OutStreamer,
MCInstBuilder(PPC::ADDIS).addReg(PICR).addReg(PICR).addExpr(DeltaHi));
- const MCExpr *DeltaLo = PPCMCExpr::createLo(DeltaExpr, false, OutContext);
+ const MCExpr *DeltaLo = PPCMCExpr::createLo(DeltaExpr, OutContext);
EmitToStreamer(
*OutStreamer,
MCInstBuilder(PPC::ADDI).addReg(PICR).addReg(PICR).addExpr(DeltaLo));
return;
} else {
MCSymbol *PICOffset =
- MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol();
+ MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol(*MF);
TmpInst.setOpcode(PPC::LWZ);
const MCExpr *Exp =
MCSymbolRefExpr::create(PICOffset, MCSymbolRefExpr::VK_None, OutContext);
@@ -676,10 +658,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
case PPC::LWZtoc: {
- assert(!IsDarwin && "TOC is an ELF/XCOFF construct.");
-
// Transform %rN = LWZtoc @op1, %r2
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to LWZ.
TmpInst.setOpcode(PPC::LWZ);
@@ -689,7 +669,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for LWZtoc.");
// Map the operand to its corresponding MCSymbol.
- const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+ const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
// Create a reference to the GOT entry for the symbol. The GOT entry will be
// synthesized later.
@@ -734,10 +714,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::LDtocCPT:
case PPC::LDtocBA:
case PPC::LDtoc: {
- assert(!IsDarwin && "TOC is an ELF/XCOFF construct");
-
// Transform %x3 = LDtoc @min1, %x2
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to LD.
TmpInst.setOpcode(PPC::LD);
@@ -750,7 +728,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// global address operand to be a reference to the TOC entry we will
// synthesize later.
MCSymbol *TOCEntry =
- lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO));
+ lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this));
const MCSymbolRefExpr::VariantKind VK =
IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC;
@@ -766,7 +744,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
" AIX.");
// Transform %rd = ADDIStocHA %rA, @sym(%r2)
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to ADDIS.
TmpInst.setOpcode(PPC::ADDIS);
@@ -776,7 +754,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for ADDIStocHA.");
// Map the machine operand to its corresponding MCSymbol.
- MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+ MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
// Always use TOC on AIX. Map the global address operand to be a reference
// to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
@@ -796,7 +774,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
" AIX.");
// Transform %rd = LWZtocL @sym, %rs.
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to lwz.
TmpInst.setOpcode(PPC::LWZ);
@@ -806,7 +784,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for LWZtocL.");
// Map the machine operand to its corresponding MCSymbol.
- MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+ MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
// Always use TOC on AIX. Map the global address operand to be a reference
// to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
@@ -821,10 +799,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case PPC::ADDIStocHA8: {
- assert(!IsDarwin && "TOC is an ELF/XCOFF construct");
-
// Transform %xd = ADDIStocHA8 %x2, @sym
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to ADDIS8. If the global address is the address of
// an external symbol, is a jump table address, is a block address, or is a
@@ -836,7 +812,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
"Invalid operand for ADDIStocHA8!");
- const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+ const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
const bool GlobalToc =
MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
@@ -861,10 +837,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case PPC::LDtocL: {
- assert(!IsDarwin && "TOC is an ELF/XCOFF construct");
-
// Transform %xd = LDtocL @sym, %xs
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to LD. If the global address is the address of
// an external symbol, is a jump table address, is a block address, or is
@@ -882,7 +856,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"LDtocL used on symbol that could be accessed directly is "
"invalid. Must match ADDIStocHA8."));
- const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+ const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
@@ -897,7 +871,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::ADDItocL: {
// Transform %xd = ADDItocL %xs, @sym
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to ADDI8. If the global address is external, then
// generate a TOC entry and reference that. Otherwise, reference the
@@ -912,7 +886,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Interposable definitions must use indirect access."));
const MCExpr *Exp =
- MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO),
+ MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this),
MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext);
TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
EmitToStreamer(*OutStreamer, TmpInst);
@@ -937,7 +911,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::LDgotTprelL:
case PPC::LDgotTprelL32: {
// Transform %xd = LDgotTprelL @sym, %xs
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to LD.
TmpInst.setOpcode(IsPPC64 ? PPC::LD : PPC::LWZ);
@@ -966,9 +940,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCBinaryExpr::createSub(MCSymbolRefExpr::create(GOTSymbol, OutContext),
MCSymbolRefExpr::create(GOTRef, OutContext),
OutContext);
- OutStreamer->EmitLabel(GOTRef);
- OutStreamer->EmitValue(OffsExpr, 4);
- OutStreamer->EmitLabel(NextInstr);
+ OutStreamer->emitLabel(GOTRef);
+ OutStreamer->emitValue(OffsExpr, 4);
+ OutStreamer->emitLabel(NextInstr);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR)
.addReg(MI->getOperand(0).getReg()));
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LWZ)
@@ -1167,10 +1141,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// suite shows a handful of test cases that fail this check for
// Darwin. Those need to be investigated before this sanity test
// can be enabled for those subtargets.
- if (!IsDarwin) {
- unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
- const MachineOperand &MO = MI->getOperand(OpNum);
- if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4)
+ unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.isGlobal()) {
+ const DataLayout &DL = MO.getGlobal()->getParent()->getDataLayout();
+ if (MO.getGlobal()->getPointerAlignment(DL) < 4)
llvm_unreachable("Global must be word-aligned for LD, STD, LWA!");
}
// Now process the instruction normally.
@@ -1178,17 +1153,17 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin);
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
EmitToStreamer(*OutStreamer, TmpInst);
}
-void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void PPCLinuxAsmPrinter::emitInstruction(const MachineInstr *MI) {
if (!Subtarget->isPPC64())
- return PPCAsmPrinter::EmitInstruction(MI);
+ return PPCAsmPrinter::emitInstruction(MI);
switch (MI->getOpcode()) {
default:
- return PPCAsmPrinter::EmitInstruction(MI);
+ return PPCAsmPrinter::emitInstruction(MI);
case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
// .begin:
// b .end # lis 0, FuncId[16..32]
@@ -1203,7 +1178,7 @@ void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// of instructions change.
MCSymbol *BeginOfSled = OutContext.createTempSymbol();
MCSymbol *EndOfSled = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(BeginOfSled);
+ OutStreamer->emitLabel(BeginOfSled);
EmitToStreamer(*OutStreamer,
MCInstBuilder(PPC::B).addExpr(
MCSymbolRefExpr::create(EndOfSled, OutContext)));
@@ -1218,8 +1193,8 @@ void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutContext.getOrCreateSymbol("__xray_FunctionEntry"),
OutContext)));
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0));
- OutStreamer->EmitLabel(EndOfSled);
- recordSled(BeginOfSled, *MI, SledKind::FUNCTION_ENTER);
+ OutStreamer->emitLabel(EndOfSled);
+ recordSled(BeginOfSled, *MI, SledKind::FUNCTION_ENTER, 2);
break;
}
case TargetOpcode::PATCHABLE_RET: {
@@ -1229,7 +1204,7 @@ void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
for (const auto &MO :
make_range(std::next(MI->operands_begin()), MI->operands_end())) {
MCOperand MCOp;
- if (LowerPPCMachineOperandToMCOperand(MO, MCOp, *this, false))
+ if (LowerPPCMachineOperandToMCOperand(MO, MCOp, *this))
RetInst.addOperand(MCOp);
}
@@ -1289,9 +1264,9 @@ void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
//
// Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number
// of instructions change.
- OutStreamer->EmitCodeAlignment(8);
+ OutStreamer->emitCodeAlignment(8);
MCSymbol *BeginOfSled = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(BeginOfSled);
+ OutStreamer->emitLabel(BeginOfSled);
EmitToStreamer(*OutStreamer, RetInst);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
EmitToStreamer(
@@ -1306,8 +1281,8 @@ void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0));
EmitToStreamer(*OutStreamer, RetInst);
if (IsConditional)
- OutStreamer->EmitLabel(FallthroughLabel);
- recordSled(BeginOfSled, *MI, SledKind::FUNCTION_EXIT);
+ OutStreamer->emitLabel(FallthroughLabel);
+ recordSled(BeginOfSled, *MI, SledKind::FUNCTION_EXIT, 2);
break;
}
case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
@@ -1320,7 +1295,7 @@ void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
-void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
+void PPCLinuxAsmPrinter::emitStartOfAsmFile(Module &M) {
if (static_cast<const PPCTargetMachine &>(TM).isELFv2ABI()) {
PPCTargetStreamer *TS =
static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
@@ -1331,10 +1306,10 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (static_cast<const PPCTargetMachine &>(TM).isPPC64() ||
!isPositionIndependent())
- return AsmPrinter::EmitStartOfAsmFile(M);
+ return AsmPrinter::emitStartOfAsmFile(M);
if (M.getPICLevel() == PICLevel::SmallPIC)
- return AsmPrinter::EmitStartOfAsmFile(M);
+ return AsmPrinter::emitStartOfAsmFile(M);
OutStreamer->SwitchSection(OutContext.getELFSection(
".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC));
@@ -1342,7 +1317,7 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
MCSymbol *TOCSym = OutContext.getOrCreateSymbol(Twine(".LTOC"));
MCSymbol *CurrentPos = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(CurrentPos);
+ OutStreamer->emitLabel(CurrentPos);
// The GOT pointer points to the middle of the GOT, in order to reference the
// entire 64kB range. 0x8000 is the midpoint.
@@ -1351,24 +1326,24 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
MCConstantExpr::create(0x8000, OutContext),
OutContext);
- OutStreamer->EmitAssignment(TOCSym, tocExpr);
+ OutStreamer->emitAssignment(TOCSym, tocExpr);
OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
}
-void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
+void PPCLinuxAsmPrinter::emitFunctionEntryLabel() {
// linux/ppc32 - Normal entry label.
if (!Subtarget->isPPC64() &&
(!isPositionIndependent() ||
MF->getFunction().getParent()->getPICLevel() == PICLevel::SmallPIC))
- return AsmPrinter::EmitFunctionEntryLabel();
+ return AsmPrinter::emitFunctionEntryLabel();
if (!Subtarget->isPPC64()) {
const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
if (PPCFI->usesPICBase() && !Subtarget->isSecurePlt()) {
- MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol();
+ MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol(*MF);
MCSymbol *PICBase = MF->getPICBaseSymbol();
- OutStreamer->EmitLabel(RelocSymbol);
+ OutStreamer->emitLabel(RelocSymbol);
const MCExpr *OffsExpr =
MCBinaryExpr::createSub(
@@ -1376,11 +1351,11 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
OutContext),
MCSymbolRefExpr::create(PICBase, OutContext),
OutContext);
- OutStreamer->EmitValue(OffsExpr, 4);
- OutStreamer->EmitLabel(CurrentFnSym);
+ OutStreamer->emitValue(OffsExpr, 4);
+ OutStreamer->emitLabel(CurrentFnSym);
return;
} else
- return AsmPrinter::EmitFunctionEntryLabel();
+ return AsmPrinter::emitFunctionEntryLabel();
}
// ELFv2 ABI - Normal entry label.
@@ -1394,17 +1369,17 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
- MCSymbol *GlobalEPSymbol = PPCFI->getGlobalEPSymbol();
+ MCSymbol *GlobalEPSymbol = PPCFI->getGlobalEPSymbol(*MF);
const MCExpr *TOCDeltaExpr =
MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
MCSymbolRefExpr::create(GlobalEPSymbol,
OutContext),
OutContext);
- OutStreamer->EmitLabel(PPCFI->getTOCOffsetSymbol());
- OutStreamer->EmitValue(TOCDeltaExpr, 8);
+ OutStreamer->emitLabel(PPCFI->getTOCOffsetSymbol(*MF));
+ OutStreamer->emitValue(TOCDeltaExpr, 8);
}
- return AsmPrinter::EmitFunctionEntryLabel();
+ return AsmPrinter::emitFunctionEntryLabel();
}
// Emit an official procedure descriptor.
@@ -1412,61 +1387,56 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
MCSectionELF *Section = OutStreamer->getContext().getELFSection(
".opd", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
OutStreamer->SwitchSection(Section);
- OutStreamer->EmitLabel(CurrentFnSym);
- OutStreamer->EmitValueToAlignment(8);
+ OutStreamer->emitLabel(CurrentFnSym);
+ OutStreamer->emitValueToAlignment(8);
MCSymbol *Symbol1 = CurrentFnSymForSize;
// Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
// entry point.
- OutStreamer->EmitValue(MCSymbolRefExpr::create(Symbol1, OutContext),
+ OutStreamer->emitValue(MCSymbolRefExpr::create(Symbol1, OutContext),
8 /*size*/);
MCSymbol *Symbol2 = OutContext.getOrCreateSymbol(StringRef(".TOC."));
// Generates a R_PPC64_TOC relocation for TOC base insertion.
- OutStreamer->EmitValue(
+ OutStreamer->emitValue(
MCSymbolRefExpr::create(Symbol2, MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext),
8/*size*/);
// Emit a null environment pointer.
- OutStreamer->EmitIntValue(0, 8 /* size */);
+ OutStreamer->emitIntValue(0, 8 /* size */);
OutStreamer->SwitchSection(Current.first, Current.second);
}
-bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
+void PPCLinuxAsmPrinter::emitEndOfAsmFile(Module &M) {
const DataLayout &DL = getDataLayout();
bool isPPC64 = DL.getPointerSizeInBits() == 64;
- PPCTargetStreamer &TS =
- static_cast<PPCTargetStreamer &>(*OutStreamer->getTargetStreamer());
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
if (!TOC.empty()) {
- MCSectionELF *Section;
-
- if (isPPC64)
- Section = OutStreamer->getContext().getELFSection(
- ".toc", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
- else
- Section = OutStreamer->getContext().getELFSection(
- ".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ const char *Name = isPPC64 ? ".toc" : ".got2";
+ MCSectionELF *Section = OutContext.getELFSection(
+ Name, ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
OutStreamer->SwitchSection(Section);
+ if (!isPPC64)
+ OutStreamer->emitValueToAlignment(4);
for (const auto &TOCMapPair : TOC) {
const MCSymbol *const TOCEntryTarget = TOCMapPair.first;
MCSymbol *const TOCEntryLabel = TOCMapPair.second;
- OutStreamer->EmitLabel(TOCEntryLabel);
- if (isPPC64) {
- TS.emitTCEntry(*TOCEntryTarget);
- } else {
- OutStreamer->EmitValueToAlignment(4);
- OutStreamer->EmitSymbolValue(TOCEntryTarget, 4);
- }
+ OutStreamer->emitLabel(TOCEntryLabel);
+ if (isPPC64 && TS != nullptr)
+ TS->emitTCEntry(*TOCEntryTarget);
+ else
+ OutStreamer->emitSymbolValue(TOCEntryTarget, 4);
}
}
- return AsmPrinter::doFinalization(M);
+ PPCAsmPrinter::emitEndOfAsmFile(M);
}
/// EmitFunctionBodyStart - Emit a global entry point prefix for ELFv2.
-void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
+void PPCLinuxAsmPrinter::emitFunctionBodyStart() {
// In the ELFv2 ABI, in functions that use the TOC register, we need to
// provide two entry points. The ABI guarantees that when calling the
// local entry point, r2 is set up by the caller to contain the TOC base
@@ -1498,16 +1468,23 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
//
// This ensures we have r2 set up correctly while executing the function
// body, no matter which entry point is called.
- if (Subtarget->isELFv2ABI()
- // Only do all that if the function uses r2 in the first place.
- && !MF->getRegInfo().use_empty(PPC::X2)) {
+ const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
+ const bool UsesX2OrR2 = !MF->getRegInfo().use_empty(PPC::X2) ||
+ !MF->getRegInfo().use_empty(PPC::R2);
+ const bool PCrelGEPRequired = Subtarget->isUsingPCRelativeCalls() &&
+ UsesX2OrR2 && PPCFI->usesTOCBasePtr();
+ const bool NonPCrelGEPRequired = !Subtarget->isUsingPCRelativeCalls() &&
+ Subtarget->isELFv2ABI() && UsesX2OrR2;
+
+ // Only do all that if the function uses R2 as the TOC pointer
+ // in the first place. We don't need the global entry point if the
+ // function uses R2 as an allocatable register.
+ if (NonPCrelGEPRequired || PCrelGEPRequired) {
// Note: The logic here must be synchronized with the code in the
// branch-selection pass which sets the offset of the first block in the
// function. This matters because it affects the alignment.
- const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
-
- MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol();
- OutStreamer->EmitLabel(GlobalEntryLabel);
+ MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol(*MF);
+ OutStreamer->emitLabel(GlobalEntryLabel);
const MCSymbolRefExpr *GlobalEntryLabelExp =
MCSymbolRefExpr::create(GlobalEntryLabel, OutContext);
@@ -1517,21 +1494,19 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
GlobalEntryLabelExp, OutContext);
- const MCExpr *TOCDeltaHi =
- PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext);
+ const MCExpr *TOCDeltaHi = PPCMCExpr::createHa(TOCDeltaExpr, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
.addReg(PPC::X2)
.addReg(PPC::X12)
.addExpr(TOCDeltaHi));
- const MCExpr *TOCDeltaLo =
- PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext);
+ const MCExpr *TOCDeltaLo = PPCMCExpr::createLo(TOCDeltaExpr, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
.addReg(PPC::X2)
.addReg(PPC::X2)
.addExpr(TOCDeltaLo));
} else {
- MCSymbol *TOCOffset = PPCFI->getTOCOffsetSymbol();
+ MCSymbol *TOCOffset = PPCFI->getTOCOffsetSymbol(*MF);
const MCExpr *TOCOffsetDeltaExpr =
MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCOffset, OutContext),
GlobalEntryLabelExp, OutContext);
@@ -1546,8 +1521,8 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
.addReg(PPC::X12));
}
- MCSymbol *LocalEntryLabel = PPCFI->getLocalEPSymbol();
- OutStreamer->EmitLabel(LocalEntryLabel);
+ MCSymbol *LocalEntryLabel = PPCFI->getLocalEPSymbol(*MF);
+ OutStreamer->emitLabel(LocalEntryLabel);
const MCSymbolRefExpr *LocalEntryLabelExp =
MCSymbolRefExpr::create(LocalEntryLabel, OutContext);
const MCExpr *LocalOffsetExp =
@@ -1559,13 +1534,43 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
if (TS)
TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym), LocalOffsetExp);
+ } else if (Subtarget->isUsingPCRelativeCalls()) {
+ // When generating the entry point for a function we have a few scenarios
+ // based on whether or not that function uses R2 and whether or not that
+ // function makes calls (or is a leaf function).
+ // 1) A leaf function that does not use R2 (or treats it as callee-saved
+ // and preserves it). In this case st_other=0 and both
+ // the local and global entry points for the function are the same.
+ // No special entry point code is required.
+ // 2) A function uses the TOC pointer R2. This function may or may not have
+ // calls. In this case st_other=[2,6] and the global and local entry
+ // points are different. Code to correctly setup the TOC pointer in R2
+ // is put between the global and local entry points. This case is
+ // covered by the if statatement above.
+ // 3) A function does not use the TOC pointer R2 but does have calls.
+ // In this case st_other=1 since we do not know whether or not any
+ // of the callees clobber R2. This case is dealt with in this else if
+ // block. Tail calls are considered calls and the st_other should also
+ // be set to 1 in that case as well.
+ // 4) The function does not use the TOC pointer but R2 is used inside
+ // the function. In this case st_other=1 once again.
+ // 5) This function uses inline asm. We mark R2 as reserved if the function
+ // has inline asm as we have to assume that it may be used.
+ if (MF->getFrameInfo().hasCalls() || MF->getFrameInfo().hasTailCall() ||
+ MF->hasInlineAsm() || (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) {
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
+ if (TS)
+ TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym),
+ MCConstantExpr::create(1, OutContext));
+ }
}
}
/// EmitFunctionBodyEnd - Print the traceback table before the .size
/// directive.
///
-void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
+void PPCLinuxAsmPrinter::emitFunctionBodyEnd() {
// Only the 64-bit target requires a traceback table. For now,
// we only emit the word of zeroes that GDB requires to find
// the end of the function, and zeroes for the eight-byte
@@ -1574,19 +1579,73 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
// the PPC64 ELF ABI (this is a low-priority item because GDB does not
// currently make use of these fields).
if (Subtarget->isPPC64()) {
- OutStreamer->EmitIntValue(0, 4/*size*/);
- OutStreamer->EmitIntValue(0, 8/*size*/);
+ OutStreamer->emitIntValue(0, 4/*size*/);
+ OutStreamer->emitIntValue(0, 8/*size*/);
+ }
+}
+
+void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
+ MCSymbol *GVSym) const {
+
+ assert(MAI->hasVisibilityOnlyWithLinkage() &&
+ "AIX's linkage directives take a visibility setting.");
+
+ MCSymbolAttr LinkageAttr = MCSA_Invalid;
+ switch (GV->getLinkage()) {
+ case GlobalValue::ExternalLinkage:
+ LinkageAttr = GV->isDeclaration() ? MCSA_Extern : MCSA_Global;
+ break;
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::ExternalWeakLinkage:
+ LinkageAttr = MCSA_Weak;
+ break;
+ case GlobalValue::AvailableExternallyLinkage:
+ LinkageAttr = MCSA_Extern;
+ break;
+ case GlobalValue::PrivateLinkage:
+ return;
+ case GlobalValue::InternalLinkage:
+ assert(GV->getVisibility() == GlobalValue::DefaultVisibility &&
+ "InternalLinkage should not have other visibility setting.");
+ LinkageAttr = MCSA_LGlobal;
+ break;
+ case GlobalValue::AppendingLinkage:
+ llvm_unreachable("Should never emit this");
+ case GlobalValue::CommonLinkage:
+ llvm_unreachable("CommonLinkage of XCOFF should not come to this path");
}
+
+ assert(LinkageAttr != MCSA_Invalid && "LinkageAttr should not MCSA_Invalid.");
+
+ MCSymbolAttr VisibilityAttr = MCSA_Invalid;
+ switch (GV->getVisibility()) {
+
+ // TODO: "exported" and "internal" Visibility needs to go here.
+ case GlobalValue::DefaultVisibility:
+ break;
+ case GlobalValue::HiddenVisibility:
+ VisibilityAttr = MAI->getHiddenVisibilityAttr();
+ break;
+ case GlobalValue::ProtectedVisibility:
+ VisibilityAttr = MAI->getProtectedVisibilityAttr();
+ break;
+ }
+
+ OutStreamer->emitXCOFFSymbolLinkageWithVisibility(GVSym, LinkageAttr,
+ VisibilityAttr);
}
void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
- // Get the function descriptor symbol.
- CurrentFnDescSym = getSymbol(&MF.getFunction());
- // Set the containing csect.
- MCSectionXCOFF *FnDescSec = OutStreamer->getContext().getXCOFFSection(
- CurrentFnDescSym->getName(), XCOFF::XMC_DS, XCOFF::XTY_SD,
- XCOFF::C_HIDEXT, SectionKind::getData());
- cast<MCSymbolXCOFF>(CurrentFnDescSym)->setContainingCsect(FnDescSec);
+ // Setup CurrentFnDescSym and its containing csect.
+ MCSectionXCOFF *FnDescSec =
+ cast<MCSectionXCOFF>(getObjFileLowering().getSectionForFunctionDescriptor(
+ &MF.getFunction(), TM));
+ FnDescSec->setAlignment(Align(Subtarget->isPPC64() ? 8 : 4));
+
+ CurrentFnDescSym = FnDescSec->getQualNameSymbol();
return AsmPrinter::SetupMachineFunction(MF);
}
@@ -1603,31 +1662,20 @@ void PPCAIXAsmPrinter::ValidateGV(const GlobalVariable *GV) {
report_fatal_error("COMDAT not yet supported by AIX.");
}
-const MCExpr *PPCAIXAsmPrinter::lowerConstant(const Constant *CV) {
- if (const Function *F = dyn_cast<Function>(CV)) {
- MCSymbolXCOFF *FSym = cast<MCSymbolXCOFF>(getSymbol(F));
- if (!FSym->hasContainingCsect()) {
- const XCOFF::StorageClass SC =
- F->isDeclaration()
- ? TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F)
- : XCOFF::C_HIDEXT;
- MCSectionXCOFF *Csect = OutStreamer->getContext().getXCOFFSection(
- FSym->getName(), XCOFF::XMC_DS,
- F->isDeclaration() ? XCOFF::XTY_ER : XCOFF::XTY_SD, SC,
- SectionKind::getData());
- FSym->setContainingCsect(Csect);
- }
- return MCSymbolRefExpr::create(
- FSym->getContainingCsect()->getQualNameSymbol(), OutContext);
- }
- return PPCAsmPrinter::lowerConstant(CV);
+static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) {
+ return StringSwitch<bool>(GV->getName())
+ .Cases("llvm.global_ctors", "llvm.global_dtors", true)
+ .Default(false);
}
-void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
ValidateGV(GV);
- // External global variables are already handled.
- if (!GV->hasInitializer())
+ // TODO: Update the handling of global arrays for static init when we support
+ // the ".ref" directive.
+ // Otherwise, we can skip these arrays, because the AIX linker collects
+ // static init functions simply based on their name.
+ if (isSpecialLLVMGlobalArrayForStaticInit(GV))
return;
// Create the symbol, set its storage class.
@@ -1635,156 +1683,133 @@ void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
GVSym->setStorageClass(
TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
+ if (GV->isDeclarationForLinker()) {
+ emitLinkage(GV, GVSym);
+ return;
+ }
+
SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM);
- if ((!GVKind.isGlobalWriteableData() && !GVKind.isReadOnly()) ||
- GVKind.isMergeable2ByteCString() || GVKind.isMergeable4ByteCString())
+ if (!GVKind.isGlobalWriteableData() && !GVKind.isReadOnly())
report_fatal_error("Encountered a global variable kind that is "
"not supported yet.");
- // Create the containing csect and switch to it.
MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
getObjFileLowering().SectionForGlobal(GV, GVKind, TM));
+
+ // Switch to the containing csect.
OutStreamer->SwitchSection(Csect);
- GVSym->setContainingCsect(Csect);
const DataLayout &DL = GV->getParent()->getDataLayout();
// Handle common symbols.
if (GVKind.isCommon() || GVKind.isBSSLocal()) {
- unsigned Align =
- GV->getAlignment() ? GV->getAlignment() : DL.getPreferredAlignment(GV);
+ Align Alignment = GV->getAlign().getValueOr(DL.getPreferredAlign(GV));
uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
if (GVKind.isBSSLocal())
- OutStreamer->EmitXCOFFLocalCommonSymbol(
- GVSym, Size, Csect->getQualNameSymbol(), Align);
+ OutStreamer->emitXCOFFLocalCommonSymbol(
+ OutContext.getOrCreateSymbol(GVSym->getUnqualifiedName()), Size,
+ GVSym, Alignment.value());
else
- OutStreamer->EmitCommonSymbol(Csect->getQualNameSymbol(), Size, Align);
+ OutStreamer->emitCommonSymbol(GVSym, Size, Alignment.value());
return;
}
MCSymbol *EmittedInitSym = GVSym;
- EmitLinkage(GV, EmittedInitSym);
- EmitAlignment(getGVAlignment(GV, DL), GV);
- OutStreamer->EmitLabel(EmittedInitSym);
- EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
+ emitLinkage(GV, EmittedInitSym);
+ emitAlignment(getGVAlignment(GV, DL), GV);
+ OutStreamer->emitLabel(EmittedInitSym);
+ emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
}
-void PPCAIXAsmPrinter::EmitFunctionDescriptor() {
+void PPCAIXAsmPrinter::emitFunctionDescriptor() {
const DataLayout &DL = getDataLayout();
const unsigned PointerSize = DL.getPointerSizeInBits() == 64 ? 8 : 4;
MCSectionSubPair Current = OutStreamer->getCurrentSection();
// Emit function descriptor.
OutStreamer->SwitchSection(
- cast<MCSymbolXCOFF>(CurrentFnDescSym)->getContainingCsect());
- OutStreamer->EmitLabel(CurrentFnDescSym);
+ cast<MCSymbolXCOFF>(CurrentFnDescSym)->getRepresentedCsect());
// Emit function entry point address.
- OutStreamer->EmitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext),
+ OutStreamer->emitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext),
PointerSize);
// Emit TOC base address.
- const MCSectionXCOFF *TOCBaseSec = OutStreamer->getContext().getXCOFFSection(
- StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT,
- SectionKind::getData());
- const MCSymbol *TOCBaseSym = TOCBaseSec->getQualNameSymbol();
- OutStreamer->EmitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext),
+ const MCSymbol *TOCBaseSym =
+ cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection())
+ ->getQualNameSymbol();
+ OutStreamer->emitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext),
PointerSize);
// Emit a null environment pointer.
- OutStreamer->EmitIntValue(0, PointerSize);
+ OutStreamer->emitIntValue(0, PointerSize);
OutStreamer->SwitchSection(Current.first, Current.second);
}
-void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) {
+void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
// If there are no functions in this module, we will never need to reference
// the TOC base.
if (M.empty())
return;
- // Emit TOC base.
- MCSectionXCOFF *TOCBaseSection = OutStreamer->getContext().getXCOFFSection(
- StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT,
- SectionKind::getData());
- // The TOC-base always has 0 size, but 4 byte alignment.
- TOCBaseSection->setAlignment(Align(4));
// Switch to section to emit TOC base.
- OutStreamer->SwitchSection(TOCBaseSection);
+ OutStreamer->SwitchSection(getObjFileLowering().getTOCBaseSection());
- PPCTargetStreamer &TS =
- static_cast<PPCTargetStreamer &>(*OutStreamer->getTargetStreamer());
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
+
+ const unsigned EntryByteSize = Subtarget->isPPC64() ? 8 : 4;
+ const unsigned TOCEntriesByteSize = TOC.size() * EntryByteSize;
+ // TODO: If TOC entries' size is larger than 32768, then we run out of
+ // positive displacement to reach the TOC entry. We need to decide how to
+ // handle entries' size larger than that later.
+ if (TOCEntriesByteSize > 32767) {
+ report_fatal_error("Handling of TOC entry displacement larger than 32767 "
+ "is not yet implemented.");
+ }
for (auto &I : TOC) {
// Setup the csect for the current TC entry.
- MCSectionXCOFF *TCEntry = OutStreamer->getContext().getXCOFFSection(
- cast<MCSymbolXCOFF>(I.first)->getUnqualifiedName(), XCOFF::XMC_TC,
- XCOFF::XTY_SD, XCOFF::C_HIDEXT, SectionKind::getData());
- cast<MCSymbolXCOFF>(I.second)->setContainingCsect(TCEntry);
+ MCSectionXCOFF *TCEntry = cast<MCSectionXCOFF>(
+ getObjFileLowering().getSectionForTOCEntry(I.first));
OutStreamer->SwitchSection(TCEntry);
- OutStreamer->EmitLabel(I.second);
- TS.emitTCEntry(*I.first);
+ OutStreamer->emitLabel(I.second);
+ if (TS != nullptr)
+ TS->emitTCEntry(*I.first);
}
}
-MCSymbol *
-PPCAIXAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) {
- const GlobalObject *GO = nullptr;
-
- // If the MO is a function or certain kind of globals, we want to make sure to
- // refer to the csect symbol, otherwise we can just do the default handling.
- if (MO.getType() != MachineOperand::MO_GlobalAddress ||
- !(GO = dyn_cast<const GlobalObject>(MO.getGlobal())))
- return PPCAsmPrinter::getMCSymbolForTOCPseudoMO(MO);
-
- // Do an early error check for globals we don't support. This will go away
- // eventually.
- const auto *GV = dyn_cast<const GlobalVariable>(GO);
- if (GV) {
- ValidateGV(GV);
- }
+bool PPCAIXAsmPrinter::doInitialization(Module &M) {
+ if (M.alias_size() > 0u)
+ report_fatal_error(
+ "module has aliases, which LLVM does not yet support for AIX");
- MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(getSymbol(GO));
-
- // If the global object is a global variable without initializer or is a
- // declaration of a function, then XSym is an external referenced symbol.
- // Hence we may need to explictly create a MCSectionXCOFF for it so that we
- // can return its symbol later.
- if (GO->isDeclaration()) {
- if (!XSym->hasContainingCsect()) {
- // Make sure the storage class is set.
- const XCOFF::StorageClass SC =
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
- XSym->setStorageClass(SC);
-
- MCSectionXCOFF *Csect = OutStreamer->getContext().getXCOFFSection(
- XSym->getName(), isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA,
- XCOFF::XTY_ER, SC, SectionKind::getMetadata());
- XSym->setContainingCsect(Csect);
- }
+ const bool Result = PPCAsmPrinter::doInitialization(M);
- return XSym->getContainingCsect()->getQualNameSymbol();
- }
+ auto setCsectAlignment = [this](const GlobalObject *GO) {
+ // Declarations have 0 alignment which is set by default.
+ if (GO->isDeclarationForLinker())
+ return;
- // Handle initialized global variables and defined functions.
- SectionKind GOKind = getObjFileLowering().getKindForGlobal(GO, TM);
-
- if (GOKind.isText()) {
- // If the MO is a function, we want to make sure to refer to the function
- // descriptor csect.
- return OutStreamer->getContext()
- .getXCOFFSection(XSym->getName(), XCOFF::XMC_DS, XCOFF::XTY_SD,
- XCOFF::C_HIDEXT, SectionKind::getData())
- ->getQualNameSymbol();
- } else if (GOKind.isCommon() || GOKind.isBSSLocal()) {
- // If the operand is a common then we should refer to the csect symbol.
- return cast<MCSectionXCOFF>(
- getObjFileLowering().SectionForGlobal(GO, GOKind, TM))
- ->getQualNameSymbol();
- }
+ SectionKind GOKind = getObjFileLowering().getKindForGlobal(GO, TM);
+ MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
+ getObjFileLowering().SectionForGlobal(GO, GOKind, TM));
+
+ Align GOAlign = getGVAlignment(GO, GO->getParent()->getDataLayout());
+ if (GOAlign > Csect->getAlignment())
+ Csect->setAlignment(GOAlign);
+ };
+
+ // We need to know, up front, the alignment of csects for the assembly path,
+ // because once a .csect directive gets emitted, we could not change the
+ // alignment value on it.
+ for (const auto &G : M.globals())
+ setCsectAlignment(&G);
+
+ for (const auto &F : M)
+ setCsectAlignment(&F);
- // Other global variables are refered to by labels inside of a single csect,
- // so refer to the label directly.
- return getSymbol(GV);
+ return Result;
}
/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
diff --git a/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index 104cf2ba3c004..2259a29f838ab 100644
--- a/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -220,7 +220,7 @@ class PPCBoolRetToInt : public FunctionPass {
auto Defs = findAllDefs(U);
// If the values are all Constants or Arguments, don't bother
- if (llvm::none_of(Defs, isa<Instruction, Value *>))
+ if (llvm::none_of(Defs, [](Value *V) { return isa<Instruction>(V); }))
return false;
// Presently, we only know how to handle PHINode, Constant, Arguments and
diff --git a/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index 109b665e0d57c..50ae4450a8375 100644
--- a/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -272,6 +272,11 @@ bool PPCBranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
return false;
}
+ if (Cand.BranchBlock->mayHaveInlineAsmBr()) {
+ LLVM_DEBUG(dbgs() << "Inline Asm Br - skip\n");
+ return false;
+ }
+
// For now only consider triangles (i.e, BranchTargetBlock is set,
// FalseMBB is null, and BranchTargetBlock is a successor to BranchBlock)
if (!Cand.BranchTargetBlock || FalseMBB ||
diff --git a/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index cdff4d383d236..47b9e97f0d67f 100644
--- a/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -31,6 +31,9 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-branch-select"
STATISTIC(NumExpanded, "Number of branches expanded to long format");
+STATISTIC(NumPrefixed, "Number of prefixed instructions");
+STATISTIC(NumPrefixedAligned,
+ "Number of prefixed instructions that have been aligned");
namespace {
struct PPCBSel : public MachineFunctionPass {
@@ -82,7 +85,7 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
unsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB,
unsigned Offset) {
const Align Alignment = MBB.getAlignment();
- if (Alignment == Align::None())
+ if (Alignment == Align(1))
return 0;
const Align ParentAlign = MBB.getParent()->getAlignment();
@@ -134,10 +137,38 @@ unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
}
unsigned BlockSize = 0;
+ unsigned UnalignedBytesRemaining = 0;
for (MachineInstr &MI : *MBB) {
- BlockSize += TII->getInstSizeInBytes(MI);
+ unsigned MINumBytes = TII->getInstSizeInBytes(MI);
if (MI.isInlineAsm() && (FirstImpreciseBlock < 0))
FirstImpreciseBlock = MBB->getNumber();
+ if (TII->isPrefixed(MI.getOpcode())) {
+ NumPrefixed++;
+
+ // All 8 byte instructions may require alignment. Each 8 byte
+ // instruction may be aligned by another 4 bytes.
+ // This means that an 8 byte instruction may require 12 bytes
+ // (8 for the instruction itself and 4 for the alignment nop).
+ // This will happen if an 8 byte instruction can be aligned to 64 bytes
+ // by only adding a 4 byte nop.
+ // We don't know the alignment at this point in the code so we have to
+ // adopt a more pessimistic approach. If an instruction may need
+ // alignment we assume that it does need alignment and add 4 bytes to
+ // it. As a result we may end up with more long branches than before
+ // but we are in the safe position where if we need a long branch we
+ // have one.
+ // The if statement checks to make sure that two 8 byte instructions
+ // are at least 64 bytes away from each other. It is not possible for
+ // two instructions that both need alignment to be within 64 bytes of
+ // each other.
+ if (!UnalignedBytesRemaining) {
+ BlockSize += 4;
+ UnalignedBytesRemaining = 60;
+ NumPrefixedAligned++;
+ }
+ }
+ UnalignedBytesRemaining -= std::min(UnalignedBytesRemaining, MINumBytes);
+ BlockSize += MINumBytes;
}
BlockSizes[MBB->getNumber()].first = BlockSize;
diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index 4ce705300e1b5..bb12e05173a68 100644
--- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -46,7 +46,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
-#include "llvm/PassSupport.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 369b9ce1a711e..1eaa7f7a44b39 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -283,15 +283,6 @@ def CC_PPC32_SVR4_ByVal : CallingConv<[
def CSR_Altivec : CalleeSavedRegs<(add V20, V21, V22, V23, V24, V25, V26, V27,
V28, V29, V30, V31)>;
-def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28,
- R29, R30, R31, F14, F15, F16, F17, F18,
- F19, F20, F21, F22, F23, F24, F25, F26,
- F27, F28, F29, F30, F31, CR2, CR3, CR4
- )>;
-
-def CSR_Darwin32_Altivec : CalleeSavedRegs<(add CSR_Darwin32, CSR_Altivec)>;
-
// SPE does not use FPRs, so break out the common register set as base.
def CSR_SVR432_COMM : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20,
R21, R22, R23, R24, R25, R26, R27,
@@ -316,45 +307,20 @@ def CSR_AIX32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;
-def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
- X21, X22, X23, X24, X25, X26, X27, X28,
- X29, X30, X31, F14, F15, F16, F17, F18,
- F19, F20, F21, F22, F23, F24, F25, F26,
- F27, F28, F29, F30, F31, CR2, CR3, CR4
- )>;
-
-def CSR_Darwin64_Altivec : CalleeSavedRegs<(add CSR_Darwin64, CSR_Altivec)>;
-
-def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
+// Common CalleeSavedRegs for SVR4 and AIX.
+def CSR_PPC64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
X21, X22, X23, X24, X25, X26, X27, X28,
X29, X30, X31, F14, F15, F16, F17, F18,
F19, F20, F21, F22, F23, F24, F25, F26,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;
-def CSR_AIX64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
- X21, X22, X23, X24, X25, X26, X27, X28,
- X29, X30, X31, F14, F15, F16, F17, F18,
- F19, F20, F21, F22, F23, F24, F25, F26,
- F27, F28, F29, F30, F31, CR2, CR3, CR4
- )>;
-
-// CSRs that are handled by prologue, epilogue.
-def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>;
-
-def CSR_SVR464_ViaCopy : CalleeSavedRegs<(add CSR_SVR464)>;
-
-def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>;
-
-def CSR_SVR464_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_Altivec)>;
-
-def CSR_SVR464_R2 : CalleeSavedRegs<(add CSR_SVR464, X2)>;
-def CSR_SVR464_R2_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2)>;
+def CSR_PPC64_Altivec : CalleeSavedRegs<(add CSR_PPC64, CSR_Altivec)>;
-def CSR_SVR464_R2_Altivec : CalleeSavedRegs<(add CSR_SVR464_Altivec, X2)>;
+def CSR_PPC64_R2 : CalleeSavedRegs<(add CSR_PPC64, X2)>;
-def CSR_SVR464_R2_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2_Altivec)>;
+def CSR_PPC64_R2_Altivec : CalleeSavedRegs<(add CSR_PPC64_Altivec, X2)>;
def CSR_NoRegs : CalleeSavedRegs<(add)>;
diff --git a/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index aa5d830b549ed..c9f74bbf861ca 100644
--- a/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -90,8 +90,8 @@ protected:
// This is a conditional branch to the return. Replace the branch
// with a bclr.
BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
- .addImm(J->getOperand(0).getImm())
- .addReg(J->getOperand(1).getReg())
+ .add(J->getOperand(0))
+ .add(J->getOperand(1))
.copyImplicitOps(*I);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
@@ -106,7 +106,7 @@ protected:
BuildMI(
**PI, J, J->getDebugLoc(),
TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn))
- .addReg(J->getOperand(0).getReg())
+ .add(J->getOperand(0))
.copyImplicitOps(*I);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
diff --git a/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
index e8ef451c7ec96..4c74e82cf0412 100644
--- a/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
+++ b/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp
@@ -381,21 +381,10 @@ void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL,
MBB->end());
NewSuccessor->transferSuccessorsAndUpdatePHIs(MBB);
- // Copy the original liveIns of MBB to NewSuccessor.
- for (auto &LI : MBB->liveins())
- NewSuccessor->addLiveIn(LI);
-
- // After splitting the NewSuccessor block, Regs defined but not killed
- // in MBB should be treated as liveins of NewSuccessor.
- // Note: Cannot use stepBackward instead since we are using the Reg
- // liveness state at the end of MBB (liveOut of MBB) as the liveIn for
- // NewSuccessor. Otherwise, will cause cyclic dependence.
- LivePhysRegs LPR(*MF->getSubtarget<PPCSubtarget>().getRegisterInfo());
- SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 2> Clobbers;
- for (MachineInstr &MI : *MBB)
- LPR.stepForward(MI, Clobbers);
- for (auto &LI : LPR)
- NewSuccessor->addLiveIn(LI);
+ // Update the liveins for NewSuccessor.
+ LivePhysRegs LPR;
+ computeAndAddLiveIns(LPR, *NewSuccessor);
+
} else {
// Remove successor from MBB.
MBB->removeSuccessor(Successor);
@@ -441,44 +430,26 @@ void PPCExpandISEL::populateBlocks(BlockISELList &BIL) {
// condition is true
MachineOperand &FalseValue = MI->getOperand(2); // Value to store if
// condition is false
- MachineOperand &ConditionRegister = MI->getOperand(3); // Condition
LLVM_DEBUG(dbgs() << "Dest: " << Dest << "\n");
LLVM_DEBUG(dbgs() << "TrueValue: " << TrueValue << "\n");
LLVM_DEBUG(dbgs() << "FalseValue: " << FalseValue << "\n");
- LLVM_DEBUG(dbgs() << "ConditionRegister: " << ConditionRegister << "\n");
+ LLVM_DEBUG(dbgs() << "ConditionRegister: " << MI->getOperand(3) << "\n");
// If the Dest Register and True Value Register are not the same one, we
// need the True Block.
bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue);
bool IsORIInstRequired = !useSameRegister(Dest, FalseValue);
- if (IsADDIInstRequired) {
- // Copy the result into the destination if the condition is true.
+ // Copy the result into the destination if the condition is true.
+ if (IsADDIInstRequired)
BuildMI(*TrueBlock, TrueBlockI, dl,
TII->get(isISEL8(*MI) ? PPC::ADDI8 : PPC::ADDI))
.add(Dest)
.add(TrueValue)
.add(MachineOperand::CreateImm(0));
- // Add the LiveIn registers required by true block.
- TrueBlock->addLiveIn(TrueValue.getReg());
- }
-
- if (IsORIInstRequired) {
- // Add the LiveIn registers required by false block.
- FalseBlock->addLiveIn(FalseValue.getReg());
- }
-
- if (NewSuccessor) {
- // Add the LiveIn registers required by NewSuccessor block.
- NewSuccessor->addLiveIn(Dest.getReg());
- NewSuccessor->addLiveIn(TrueValue.getReg());
- NewSuccessor->addLiveIn(FalseValue.getReg());
- NewSuccessor->addLiveIn(ConditionRegister.getReg());
- }
-
- // Copy the value into the destination if the condition is false.
+ // Copy the result into the destination if the condition is false.
if (IsORIInstRequired)
BuildMI(*FalseBlock, FalseBlockI, dl,
TII->get(isISEL8(*MI) ? PPC::ORI8 : PPC::ORI))
@@ -490,6 +461,18 @@ void PPCExpandISEL::populateBlocks(BlockISELList &BIL) {
NumExpanded++;
}
+
+ if (IsTrueBlockRequired) {
+ // Update the liveins for TrueBlock.
+ LivePhysRegs LPR;
+ computeAndAddLiveIns(LPR, *TrueBlock);
+ }
+
+ if (IsFalseBlockRequired) {
+ // Update the liveins for FalseBlock.
+ LivePhysRegs LPR;
+ computeAndAddLiveIns(LPR, *FalseBlock);
+ }
}
void PPCExpandISEL::expandMergeableISELs(BlockISELList &BIL) {
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index d8425d89da92e..39790ac9a8aab 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -87,6 +87,7 @@ class PPCFastISel final : public FastISel {
const TargetMachine &TM;
const PPCSubtarget *PPCSubTarget;
+ const PPCSubtarget *Subtarget;
PPCFunctionInfo *PPCFuncInfo;
const TargetInstrInfo &TII;
const TargetLowering &TLI;
@@ -97,12 +98,12 @@ class PPCFastISel final : public FastISel {
const TargetLibraryInfo *LibInfo)
: FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
+ Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
- TII(*PPCSubTarget->getInstrInfo()),
- TLI(*PPCSubTarget->getTargetLowering()),
+ TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
Context(&FuncInfo.Fn->getContext()) {}
- // Backend specific FastISel code.
+ // Backend specific FastISel code.
private:
bool fastSelectInstruction(const Instruction *I) override;
unsigned fastMaterializeConstant(const Constant *C) override;
@@ -456,7 +457,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
bool IsZExt, unsigned FP64LoadOpc) {
unsigned Opc;
bool UseOffset = true;
- bool HasSPE = PPCSubTarget->hasSPE();
+ bool HasSPE = Subtarget->hasSPE();
// If ResultReg is given, it determines the register class of the load.
// Otherwise, RC is the register class to use. If the result of the
@@ -498,7 +499,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
UseOffset = ((Addr.Offset & 3) == 0);
break;
case MVT::f32:
- Opc = PPCSubTarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
+ Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
break;
case MVT::f64:
Opc = FP64LoadOpc;
@@ -536,7 +537,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
Addr.Offset),
MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
- MFI.getObjectAlignment(Addr.Base.FI));
+ MFI.getObjectAlign(Addr.Base.FI));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
@@ -614,7 +615,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) {
Register ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
- PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
+ Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
return false;
updateValueMap(I, ResultReg);
return true;
@@ -647,10 +648,10 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
UseOffset = ((Addr.Offset & 3) == 0);
break;
case MVT::f32:
- Opc = PPCSubTarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
+ Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
break;
case MVT::f64:
- Opc = PPCSubTarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
+ Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
break;
}
@@ -682,7 +683,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
Addr.Offset),
MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
- MFI.getObjectAlignment(Addr.Base.FI));
+ MFI.getObjectAlign(Addr.Base.FI));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg)
@@ -794,8 +795,9 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
return false;
BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
- .addImm(PPCSubTarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
- .addReg(CondReg).addMBB(TBB);
+ .addImm(Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
+ .addReg(CondReg)
+ .addMBB(TBB);
finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
@@ -827,7 +829,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
- if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits())
+ if (SrcVT == MVT::i1 && Subtarget->useCRBits())
return false;
// See if operand 2 is an immediate encodeable in the compare.
@@ -836,7 +838,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
// similar to ARM in this regard.
long Imm = 0;
bool UseImm = false;
- const bool HasSPE = PPCSubTarget->hasSPE();
+ const bool HasSPE = Subtarget->hasSPE();
// Only 16-bit integer constants can be represented in compares for
// PowerPC. Others will be materialized into a register.
@@ -988,7 +990,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
// Round the result to single precision.
unsigned DestReg;
auto RC = MRI.getRegClass(SrcReg);
- if (PPCSubTarget->hasSPE()) {
+ if (Subtarget->hasSPE()) {
DestReg = createResultReg(&PPC::GPRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::EFSCFD), DestReg)
@@ -1030,7 +1032,7 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
// Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
Address Addr;
Addr.BaseType = Address::FrameIndexBase;
- Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+ Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
// Store the value from the GPR.
if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
@@ -1043,10 +1045,10 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
if (SrcVT == MVT::i32) {
if (!IsSigned) {
LoadOpc = PPC::LFIWZX;
- Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
- } else if (PPCSubTarget->hasLFIWAX()) {
+ Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
+ } else if (Subtarget->hasLFIWAX()) {
LoadOpc = PPC::LFIWAX;
- Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
+ Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
}
}
@@ -1086,7 +1088,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
return false;
// Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
- if (PPCSubTarget->hasSPE()) {
+ if (Subtarget->hasSPE()) {
unsigned Opc;
if (DstVT == MVT::f32)
Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
@@ -1103,7 +1105,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
// We can only lower an unsigned convert if we have the newer
// floating-point conversion operations.
- if (!IsSigned && !PPCSubTarget->hasFPCVT())
+ if (!IsSigned && !Subtarget->hasFPCVT())
return false;
// FIXME: For now we require the newer floating-point conversion operations
@@ -1111,7 +1113,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
// to single-precision float. Otherwise we have to generate a lot of
// fiddly code to avoid double rounding. If necessary, the fiddly code
// can be found in PPCTargetLowering::LowerINT_TO_FP().
- if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT())
+ if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
return false;
// Extend the input if necessary.
@@ -1159,7 +1161,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
// easiest code gen possible.
Address Addr;
Addr.BaseType = Address::FrameIndexBase;
- Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+ Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
// Store the value from the FPR.
if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
@@ -1168,7 +1170,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
// Reload it into a GPR. If we want an i32 on big endian, modify the
// address to have a 4-byte offset so we load from the right place.
if (VT == MVT::i32)
- Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
+ Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
// Look at the currently assigned register for this instruction
// to determine the required register class.
@@ -1196,8 +1198,8 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
return false;
// If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
- if (DstVT == MVT::i64 && !IsSigned &&
- !PPCSubTarget->hasFPCVT() && !PPCSubTarget->hasSPE())
+ if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
+ !Subtarget->hasSPE())
return false;
Value *Src = I->getOperand(0);
@@ -1226,7 +1228,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
unsigned Opc;
auto RC = MRI.getRegClass(SrcReg);
- if (PPCSubTarget->hasSPE()) {
+ if (Subtarget->hasSPE()) {
DestReg = createResultReg(&PPC::GPRCRegClass);
if (IsSigned)
Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
@@ -1234,7 +1236,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
} else if (isVSFRCRegClass(RC)) {
DestReg = createResultReg(&PPC::VSFRCRegClass);
- if (DstVT == MVT::i32)
+ if (DstVT == MVT::i32)
Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
else
Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
@@ -1244,7 +1246,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (IsSigned)
Opc = PPC::FCTIWZ;
else
- Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
+ Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
else
Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
}
@@ -1254,8 +1256,9 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
.addReg(SrcReg);
// Now move the integer value from a float register to an integer register.
- unsigned IntReg = PPCSubTarget->hasSPE() ? DestReg :
- PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
+ unsigned IntReg = Subtarget->hasSPE()
+ ? DestReg
+ : PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
if (IntReg == 0)
return false;
@@ -1383,8 +1386,8 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
// Reserve space for the linkage area on the stack.
- unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize();
- CCInfo.AllocateStack(LinkageSize, 8);
+ unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
+ CCInfo.AllocateStack(LinkageSize, Align(8));
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
@@ -1573,7 +1576,7 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
RetVT != MVT::i8)
return false;
- else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
+ else if (RetVT == MVT::i1 && Subtarget->useCRBits())
// We can't handle boolean returns when CR bits are in use.
return false;
@@ -1688,9 +1691,6 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
- if (TLI.supportSplitCSR(FuncInfo.MF))
- return false;
-
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
@@ -1996,10 +1996,9 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
return 0;
// All FP constants are loaded from the constant pool.
- unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
- assert(Align > 0 && "Unexpectedly missing alignment information!");
- unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
- const bool HasSPE = PPCSubTarget->hasSPE();
+ Align Alignment = DL.getPrefTypeAlign(CFP->getType());
+ unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
+ const bool HasSPE = Subtarget->hasSPE();
const TargetRegisterClass *RC;
if (HasSPE)
RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
@@ -2011,7 +2010,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getConstantPool(*FuncInfo.MF),
- MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align);
+ MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Alignment);
unsigned Opc;
@@ -2093,7 +2092,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8),
HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
- if (PPCSubTarget->isGVIndirectSymbol(GV)) {
+ if (Subtarget->isGVIndirectSymbol(GV)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
DestReg).addGlobalAddress(GV).addReg(HighPartReg);
} else {
@@ -2200,7 +2199,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
bool UseSExt) {
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
- if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
+ if (VT == MVT::i1 && Subtarget->useCRBits()) {
unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
@@ -2355,7 +2354,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
Register ResultReg = MI->getOperand(0).getReg();
if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
- PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
+ Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
return false;
MachineBasicBlock::iterator I(MI);
@@ -2382,7 +2381,7 @@ unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
- if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
+ if (VT == MVT::i1 && Subtarget->useCRBits()) {
unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 4c608520e2658..bd9174c1973dc 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCFrameLowering.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
@@ -31,6 +32,7 @@ using namespace llvm;
#define DEBUG_TYPE "framelowering"
STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
+STATISTIC(NumPrologProbed, "Number of prologues probed");
static cl::opt<bool>
EnablePEVectorSpills("ppc-enable-pe-vector-spills",
@@ -47,7 +49,7 @@ static const MCPhysReg VRRegNo[] = {
};
static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
- if (STI.isDarwinABI() || STI.isAIXABI())
+ if (STI.isAIXABI())
return STI.isPPC64() ? 16 : 8;
// SVR4 ABI:
return STI.isPPC64() ? 16 : 4;
@@ -60,20 +62,12 @@ static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
}
static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
- // For the Darwin ABI:
- // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
- // for saving the frame pointer (if needed.) While the published ABI has
- // not used this slot since at least MacOSX 10.2, there is older code
- // around that does use it, and that needs to continue to work.
- if (STI.isDarwinABI())
- return STI.isPPC64() ? -8U : -4U;
-
- // SVR4 ABI: First slot in the general register save area.
+ // First slot in the general register save area.
return STI.isPPC64() ? -8U : -4U;
}
static unsigned computeLinkageSize(const PPCSubtarget &STI) {
- if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
+ if (STI.isAIXABI() || STI.isPPC64())
return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
// 32-bit SVR4 ABI:
@@ -81,18 +75,16 @@ static unsigned computeLinkageSize(const PPCSubtarget &STI) {
}
static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
- if (STI.isDarwinABI())
- return STI.isPPC64() ? -16U : -8U;
+ // Third slot in the general purpose register save area.
+ if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
+ return -12U;
- // SVR4 ABI: First slot in the general register save area.
- return STI.isPPC64()
- ? -16U
- : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
+ // Second slot in the general purpose register save area.
+ return STI.isPPC64() ? -16U : -8U;
}
-static unsigned computeCRSaveOffset() {
- // The condition register save offset needs to be updated for AIX PPC32.
- return 8;
+static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
+ return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
}
PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
@@ -103,71 +95,97 @@ PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
LinkageSize(computeLinkageSize(Subtarget)),
BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
- CRSaveOffset(computeCRSaveOffset()) {}
+ CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
unsigned &NumEntries) const {
- if (Subtarget.isDarwinABI()) {
- NumEntries = 1;
- if (Subtarget.isPPC64()) {
- static const SpillSlot darwin64Offsets = {PPC::X31, -8};
- return &darwin64Offsets;
- } else {
- static const SpillSlot darwinOffsets = {PPC::R31, -4};
- return &darwinOffsets;
- }
- }
- // Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI()) {
- NumEntries = 0;
- return nullptr;
- }
+// Floating-point register save area offsets.
+#define CALLEE_SAVED_FPRS \
+ {PPC::F31, -8}, \
+ {PPC::F30, -16}, \
+ {PPC::F29, -24}, \
+ {PPC::F28, -32}, \
+ {PPC::F27, -40}, \
+ {PPC::F26, -48}, \
+ {PPC::F25, -56}, \
+ {PPC::F24, -64}, \
+ {PPC::F23, -72}, \
+ {PPC::F22, -80}, \
+ {PPC::F21, -88}, \
+ {PPC::F20, -96}, \
+ {PPC::F19, -104}, \
+ {PPC::F18, -112}, \
+ {PPC::F17, -120}, \
+ {PPC::F16, -128}, \
+ {PPC::F15, -136}, \
+ {PPC::F14, -144}
+
+// 32-bit general purpose register save area offsets shared by ELF and
+// AIX. AIX has an extra CSR with r13.
+#define CALLEE_SAVED_GPRS32 \
+ {PPC::R31, -4}, \
+ {PPC::R30, -8}, \
+ {PPC::R29, -12}, \
+ {PPC::R28, -16}, \
+ {PPC::R27, -20}, \
+ {PPC::R26, -24}, \
+ {PPC::R25, -28}, \
+ {PPC::R24, -32}, \
+ {PPC::R23, -36}, \
+ {PPC::R22, -40}, \
+ {PPC::R21, -44}, \
+ {PPC::R20, -48}, \
+ {PPC::R19, -52}, \
+ {PPC::R18, -56}, \
+ {PPC::R17, -60}, \
+ {PPC::R16, -64}, \
+ {PPC::R15, -68}, \
+ {PPC::R14, -72}
+
+// 64-bit general purpose register save area offsets.
+#define CALLEE_SAVED_GPRS64 \
+ {PPC::X31, -8}, \
+ {PPC::X30, -16}, \
+ {PPC::X29, -24}, \
+ {PPC::X28, -32}, \
+ {PPC::X27, -40}, \
+ {PPC::X26, -48}, \
+ {PPC::X25, -56}, \
+ {PPC::X24, -64}, \
+ {PPC::X23, -72}, \
+ {PPC::X22, -80}, \
+ {PPC::X21, -88}, \
+ {PPC::X20, -96}, \
+ {PPC::X19, -104}, \
+ {PPC::X18, -112}, \
+ {PPC::X17, -120}, \
+ {PPC::X16, -128}, \
+ {PPC::X15, -136}, \
+ {PPC::X14, -144}
+
+// Vector register save area offsets.
+#define CALLEE_SAVED_VRS \
+ {PPC::V31, -16}, \
+ {PPC::V30, -32}, \
+ {PPC::V29, -48}, \
+ {PPC::V28, -64}, \
+ {PPC::V27, -80}, \
+ {PPC::V26, -96}, \
+ {PPC::V25, -112}, \
+ {PPC::V24, -128}, \
+ {PPC::V23, -144}, \
+ {PPC::V22, -160}, \
+ {PPC::V21, -176}, \
+ {PPC::V20, -192}
// Note that the offsets here overlap, but this is fixed up in
// processFunctionBeforeFrameFinalized.
- static const SpillSlot Offsets[] = {
- // Floating-point register save area offsets.
- {PPC::F31, -8},
- {PPC::F30, -16},
- {PPC::F29, -24},
- {PPC::F28, -32},
- {PPC::F27, -40},
- {PPC::F26, -48},
- {PPC::F25, -56},
- {PPC::F24, -64},
- {PPC::F23, -72},
- {PPC::F22, -80},
- {PPC::F21, -88},
- {PPC::F20, -96},
- {PPC::F19, -104},
- {PPC::F18, -112},
- {PPC::F17, -120},
- {PPC::F16, -128},
- {PPC::F15, -136},
- {PPC::F14, -144},
-
- // General register save area offsets.
- {PPC::R31, -4},
- {PPC::R30, -8},
- {PPC::R29, -12},
- {PPC::R28, -16},
- {PPC::R27, -20},
- {PPC::R26, -24},
- {PPC::R25, -28},
- {PPC::R24, -32},
- {PPC::R23, -36},
- {PPC::R22, -40},
- {PPC::R21, -44},
- {PPC::R20, -48},
- {PPC::R19, -52},
- {PPC::R18, -56},
- {PPC::R17, -60},
- {PPC::R16, -64},
- {PPC::R15, -68},
- {PPC::R14, -72},
+ static const SpillSlot ELFOffsets32[] = {
+ CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS32,
// CR save area offset. We map each of the nonvolatile CR fields
// to the slot for CR2, which is the first of the nonvolatile CR
@@ -178,19 +196,7 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
// VRSAVE save area offset.
{PPC::VRSAVE, -4},
- // Vector register save area
- {PPC::V31, -16},
- {PPC::V30, -32},
- {PPC::V29, -48},
- {PPC::V28, -64},
- {PPC::V27, -80},
- {PPC::V26, -96},
- {PPC::V25, -112},
- {PPC::V24, -128},
- {PPC::V23, -144},
- {PPC::V22, -160},
- {PPC::V21, -176},
- {PPC::V20, -192},
+ CALLEE_SAVED_VRS,
// SPE register save area (overlaps Vector save area).
{PPC::S31, -8},
@@ -212,73 +218,48 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
{PPC::S15, -136},
{PPC::S14, -144}};
- static const SpillSlot Offsets64[] = {
- // Floating-point register save area offsets.
- {PPC::F31, -8},
- {PPC::F30, -16},
- {PPC::F29, -24},
- {PPC::F28, -32},
- {PPC::F27, -40},
- {PPC::F26, -48},
- {PPC::F25, -56},
- {PPC::F24, -64},
- {PPC::F23, -72},
- {PPC::F22, -80},
- {PPC::F21, -88},
- {PPC::F20, -96},
- {PPC::F19, -104},
- {PPC::F18, -112},
- {PPC::F17, -120},
- {PPC::F16, -128},
- {PPC::F15, -136},
- {PPC::F14, -144},
-
- // General register save area offsets.
- {PPC::X31, -8},
- {PPC::X30, -16},
- {PPC::X29, -24},
- {PPC::X28, -32},
- {PPC::X27, -40},
- {PPC::X26, -48},
- {PPC::X25, -56},
- {PPC::X24, -64},
- {PPC::X23, -72},
- {PPC::X22, -80},
- {PPC::X21, -88},
- {PPC::X20, -96},
- {PPC::X19, -104},
- {PPC::X18, -112},
- {PPC::X17, -120},
- {PPC::X16, -128},
- {PPC::X15, -136},
- {PPC::X14, -144},
+ static const SpillSlot ELFOffsets64[] = {
+ CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS64,
// VRSAVE save area offset.
{PPC::VRSAVE, -4},
+ CALLEE_SAVED_VRS
+ };
- // Vector register save area
- {PPC::V31, -16},
- {PPC::V30, -32},
- {PPC::V29, -48},
- {PPC::V28, -64},
- {PPC::V27, -80},
- {PPC::V26, -96},
- {PPC::V25, -112},
- {PPC::V24, -128},
- {PPC::V23, -144},
- {PPC::V22, -160},
- {PPC::V21, -176},
- {PPC::V20, -192}};
+ static const SpillSlot AIXOffsets32[] = {
+ CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS32,
+ // Add AIX's extra CSR.
+ {PPC::R13, -76},
+ // TODO: Update when we add vector support for AIX.
+ };
- if (Subtarget.isPPC64()) {
- NumEntries = array_lengthof(Offsets64);
+ static const SpillSlot AIXOffsets64[] = {
+ CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS64,
+ // TODO: Update when we add vector support for AIX.
+ };
- return Offsets64;
- } else {
- NumEntries = array_lengthof(Offsets);
+ if (Subtarget.is64BitELFABI()) {
+ NumEntries = array_lengthof(ELFOffsets64);
+ return ELFOffsets64;
+ }
- return Offsets;
+ if (Subtarget.is32BitELFABI()) {
+ NumEntries = array_lengthof(ELFOffsets32);
+ return ELFOffsets32;
}
+
+ assert(Subtarget.isAIXABI() && "Unexpected ABI.");
+
+ if (Subtarget.isPPC64()) {
+ NumEntries = array_lengthof(AIXOffsets64);
+ return AIXOffsets64;
+ }
+
+ NumEntries = array_lengthof(AIXOffsets32);
+ return AIXOffsets32;
}
/// RemoveVRSaveCode - We have found that this function does not need any code
@@ -479,9 +460,9 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
// Get stack alignments. The frame must be aligned to the greatest of these:
- unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
- unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
- unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
+ Align TargetAlign = getStackAlign(); // alignment required per the ABI
+ Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame
+ Align Alignment = std::max(TargetAlign, MaxAlign);
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -513,7 +494,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
// If we have dynamic alloca then maxCallFrameSize needs to be aligned so
// that allocations will be aligned.
if (MFI.hasVarSizedObjects())
- maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+ maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
// Update the new max call frame size if the caller passes in a valid pointer.
if (NewMaxCallFrameSize)
@@ -523,7 +504,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
FrameSize += maxCallFrameSize;
// Make sure the frame is aligned.
- FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+ FrameSize = alignTo(FrameSize, Alignment);
return FrameSize;
}
@@ -613,11 +594,11 @@ bool
PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
bool UseAtEnd,
bool TwoUniqueRegsRequired,
- unsigned *SR1,
- unsigned *SR2) const {
+ Register *SR1,
+ Register *SR2) const {
RegScavenger RS;
- unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
- unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
+ Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
+ Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
// Set the defaults for the two scratch registers.
if (SR1)
@@ -684,7 +665,7 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
if (SecondScratchReg != -1)
*SR2 = SecondScratchReg;
else
- *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
+ *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
}
// Now that we've done our best to provide both registers, double check
@@ -709,7 +690,7 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
int NegFrameSize = -FrameSize;
bool IsLargeFrame = !isInt<16>(NegFrameSize);
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned MaxAlign = MFI.getMaxAlignment();
+ Align MaxAlign = MFI.getMaxAlign();
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
@@ -778,11 +759,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineFrameInfo &MFI = MF.getFrameInfo();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
DebugLoc dl;
- bool needsCFI = MF.needsFrameMoves();
+ // AIX assembler does not support cfi directives.
+ const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
@@ -790,8 +773,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
bool isSVR4ABI = Subtarget.isSVR4ABI();
bool isAIXABI = Subtarget.isAIXABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
- assert((Subtarget.isDarwinABI() || isSVR4ABI || isAIXABI) &&
- "Unsupported PPC ABI.");
+ assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
// Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
// process it.
@@ -821,20 +803,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
bool MustSaveTOC = FI->mustSaveTOC();
- const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
+ const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
bool MustSaveCR = !MustSaveCRs.empty();
// Do we have a frame pointer and/or base pointer for this function?
bool HasFP = hasFP(MF);
bool HasBP = RegInfo->hasBasePointer(MF);
bool HasRedZone = isPPC64 || !isSVR4ABI;
- unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
Register BPReg = RegInfo->getBaseRegister(MF);
- unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
- unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
- unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
- unsigned ScratchReg = 0;
- unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
+ Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
+ Register ScratchReg;
+ Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
// ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
: PPC::MFLR );
@@ -854,6 +836,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
: PPC::SUBFC);
const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
: PPC::SUBFIC);
+ const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
+ : PPC::MFCR);
+ const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
// Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
// LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
@@ -863,9 +848,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
"FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
// Using the same bool variable as below to suppress compiler warnings.
- bool SingleScratchReg =
- findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
- &ScratchReg, &TempReg);
+ // Stack probe requires two scratch registers, one for old sp, one for large
+ // frame and large probe size.
+ bool SingleScratchReg = findScratchRegister(
+ &MBB, false,
+ twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF),
+ &ScratchReg, &TempReg);
assert(SingleScratchReg &&
"Required number of registers not available in this block");
@@ -906,21 +894,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
// Get stack alignments.
- unsigned MaxAlign = MFI.getMaxAlignment();
+ Align MaxAlign = MFI.getMaxAlign();
if (HasBP && MaxAlign > 1)
- assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
- "Invalid alignment!");
+ assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
// Frames of 32KB & larger require special handling because they cannot be
// indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
bool isLargeFrame = !isInt<16>(NegFrameSize);
- assert((isPPC64 || !MustSaveCR) &&
- "Prologue CR saving supported only in 64-bit mode");
-
- if (MustSaveCR && isAIXABI)
- report_fatal_error("Prologue CR saving is unimplemented on AIX.");
-
// Check if we can move the stack update instruction (stdu) down the prologue
// past the callee saves. Hopefully this will avoid the situation where the
// saves are waiting for the update on the store with update to complete.
@@ -960,49 +941,42 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
}
- // If we need to spill the CR and the LR but we don't have two separate
- // registers available, we must spill them one at a time
- if (MustSaveCR && SingleScratchReg && MustSaveLR) {
+ // Where in the prologue we move the CR fields depends on how many scratch
+ // registers we have, and if we need to save the link register or not. This
+ // lambda is to avoid duplicating the logic in 2 places.
+ auto BuildMoveFromCR = [&]() {
+ if (isELFv2ABI && MustSaveCRs.size() == 1) {
// In the ELFv2 ABI, we are not required to save all CR fields.
- // If only one or two CR fields are clobbered, it is more efficient to use
- // mfocrf to selectively save just those fields, because mfocrf has short
+ // If only one CR field is clobbered, it is more efficient to use
+ // mfocrf to selectively save just that field, because mfocrf has short
// latency compares to mfcr.
- unsigned MfcrOpcode = PPC::MFCR8;
- unsigned CrState = RegState::ImplicitKill;
- if (isELFv2ABI && MustSaveCRs.size() == 1) {
- MfcrOpcode = PPC::MFOCRF8;
- CrState = RegState::Kill;
+ assert(isPPC64 && "V2 ABI is 64-bit only.");
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
+ MIB.addReg(MustSaveCRs[0], RegState::Kill);
+ } else {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
+ for (unsigned CRfield : MustSaveCRs)
+ MIB.addReg(CRfield, RegState::ImplicitKill);
}
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
- for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- MIB.addReg(MustSaveCRs[i], CrState);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
- .addReg(TempReg, getKillRegState(true))
- .addImm(getCRSaveOffset())
- .addReg(SPReg);
+ };
+
+ // If we need to spill the CR and the LR but we don't have two separate
+ // registers available, we must spill them one at a time
+ if (MustSaveCR && SingleScratchReg && MustSaveLR) {
+ BuildMoveFromCR();
+ BuildMI(MBB, MBBI, dl, StoreWordInst)
+ .addReg(TempReg, getKillRegState(true))
+ .addImm(CRSaveOffset)
+ .addReg(SPReg);
}
if (MustSaveLR)
BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
- if (MustSaveCR &&
- !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
- // In the ELFv2 ABI, we are not required to save all CR fields.
- // If only one or two CR fields are clobbered, it is more efficient to use
- // mfocrf to selectively save just those fields, because mfocrf has short
- // latency compares to mfcr.
- unsigned MfcrOpcode = PPC::MFCR8;
- unsigned CrState = RegState::ImplicitKill;
- if (isELFv2ABI && MustSaveCRs.size() == 1) {
- MfcrOpcode = PPC::MFOCRF8;
- CrState = RegState::Kill;
- }
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
- for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- MIB.addReg(MustSaveCRs[i], CrState);
- }
+ if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
+ BuildMoveFromCR();
if (HasRedZone) {
if (HasFP)
@@ -1029,11 +1003,11 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(SPReg);
if (MustSaveCR &&
- !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
+ !(SingleScratchReg && MustSaveLR)) {
assert(HasRedZone && "A red zone is always available on PPC64");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
+ BuildMI(MBB, MBBI, dl, StoreWordInst)
.addReg(TempReg, getKillRegState(true))
- .addImm(getCRSaveOffset())
+ .addImm(CRSaveOffset)
.addReg(SPReg);
}
@@ -1055,58 +1029,81 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// the negated frame size will be placed in ScratchReg.
bool HasSTUX = false;
- // This condition must be kept in sync with canUseAsPrologue.
- if (HasBP && MaxAlign > 1) {
- if (isPPC64)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
- .addReg(SPReg)
- .addImm(0)
- .addImm(64 - Log2_32(MaxAlign));
- else // PPC32...
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
- .addReg(SPReg)
- .addImm(0)
- .addImm(32 - Log2_32(MaxAlign))
- .addImm(31);
- if (!isLargeFrame) {
- BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
+ // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
+ // pointer is always stored at SP, we will get a free probe due to an essential
+ // STU(X) instruction.
+ if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
+ // To be consistent with other targets, a pseudo instruction is emitted and
+ // will be later expanded in `inlineStackProbe`.
+ BuildMI(MBB, MBBI, dl,
+ TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
+ : PPC::PROBED_STACKALLOC_32))
+ .addDef(ScratchReg)
+ .addDef(TempReg) // TempReg stores the old sp.
.addImm(NegFrameSize);
- } else {
- assert(!SingleScratchReg && "Only a single scratch reg available");
- BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
- .addReg(TempReg, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
- .addReg(TempReg, RegState::Kill);
+ // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
+ // update the ScratchReg to meet the assumption that ScratchReg contains
+ // the NegFrameSize. This solution is rather tricky.
+ if (!HasRedZone) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
+ .addReg(TempReg)
+ .addReg(SPReg);
+ HasSTUX = true;
}
+ } else {
+ // This condition must be kept in sync with canUseAsPrologue.
+ if (HasBP && MaxAlign > 1) {
+ if (isPPC64)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(64 - Log2(MaxAlign));
+ else // PPC32...
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(32 - Log2(MaxAlign))
+ .addImm(31);
+ if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize);
+ } else {
+ assert(!SingleScratchReg && "Only a single scratch reg available");
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
+ .addReg(TempReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(TempReg, RegState::Kill);
+ }
- BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
- .addReg(SPReg, RegState::Kill)
- .addReg(SPReg)
- .addReg(ScratchReg);
- HasSTUX = true;
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ HasSTUX = true;
- } else if (!isLargeFrame) {
- BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
- .addReg(SPReg)
- .addImm(NegFrameSize)
- .addReg(SPReg);
+ } else if (!isLargeFrame) {
+ BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
+ .addReg(SPReg)
+ .addImm(NegFrameSize)
+ .addReg(SPReg);
- } else {
- BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
- .addReg(SPReg, RegState::Kill)
- .addReg(SPReg)
- .addReg(ScratchReg);
- HasSTUX = true;
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ HasSTUX = true;
+ }
}
// Save the TOC register after the stack pointer update if a prologue TOC
@@ -1247,7 +1244,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// Adjust the definition of CFA to account for the change in SP.
assert(NegFrameSize);
CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
}
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
@@ -1337,7 +1334,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// actually saved gets its own CFI record.
unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset()));
+ nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
continue;
@@ -1367,6 +1364,175 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
}
+void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const {
+ // TODO: Generate CFI instructions.
+ bool isPPC64 = Subtarget.isPPC64();
+ const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+ // AIX assembler does not support cfi directives.
+ const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
+ auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
+ int Opc = MI.getOpcode();
+ return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
+ });
+ if (StackAllocMIPos == PrologMBB.end())
+ return;
+ const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
+ DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
+ MachineInstr &MI = *StackAllocMIPos;
+ int64_t NegFrameSize = MI.getOperand(2).getImm();
+ int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF);
+ assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
+ int64_t NumBlocks = NegFrameSize / NegProbeSize;
+ int64_t NegResidualSize = NegFrameSize % NegProbeSize;
+ Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ Register ScratchReg = MI.getOperand(0).getReg();
+ Register FPReg = MI.getOperand(1).getReg();
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ bool HasBP = RegInfo->hasBasePointer(MF);
+ Align MaxAlign = MFI.getMaxAlign();
+ // Initialize current frame pointer.
+ const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
+ BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
+ // Subroutines to generate .cfi_* directives.
+ auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, Register Reg) {
+ unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ };
+ auto buildDefCFA = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, Register Reg,
+ int Offset) {
+ unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
+ unsigned CFIIndex = MBB.getParent()->addFrameInst(
+ MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ };
+ // Subroutine to determine if we can use the Imm as part of d-form.
+ auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
+ // Subroutine to materialize the Imm into TempReg.
+ auto MaterializeImm = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int64_t Imm,
+ Register &TempReg) {
+ assert(isInt<32>(Imm) && "Unhandled imm");
+ if (isInt<16>(Imm))
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
+ .addImm(Imm);
+ else {
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
+ .addImm(Imm >> 16);
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
+ .addReg(TempReg)
+ .addImm(Imm & 0xFFFF);
+ }
+ };
+ // Subroutine to store frame pointer and decrease stack pointer by probe size.
+ auto allocateAndProbe = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int64_t NegSize,
+ Register NegSizeReg, bool UseDForm) {
+ if (UseDForm)
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
+ .addReg(FPReg)
+ .addImm(NegSize)
+ .addReg(SPReg);
+ else
+ BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
+ .addReg(FPReg)
+ .addReg(SPReg)
+ .addReg(NegSizeReg);
+ };
+ // Use FPReg to calculate CFA.
+ if (needsCFI)
+ buildDefCFA(PrologMBB, {MI}, FPReg, 0);
+ // For case HasBP && MaxAlign > 1, we have to align the SP by performing
+ // SP = SP - SP % MaxAlign.
+ if (HasBP && MaxAlign > 1) {
+ if (isPPC64)
+ BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(FPReg)
+ .addImm(0)
+ .addImm(64 - Log2(MaxAlign));
+ else
+ BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(FPReg)
+ .addImm(0)
+ .addImm(32 - Log2(MaxAlign))
+ .addImm(31);
+ BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX),
+ SPReg)
+ .addReg(FPReg)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ }
+ // Probe residual part.
+ if (NegResidualSize) {
+ bool ResidualUseDForm = CanUseDForm(NegResidualSize);
+ if (!ResidualUseDForm)
+ MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg);
+ allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg,
+ ResidualUseDForm);
+ }
+ bool UseDForm = CanUseDForm(NegProbeSize);
+ // If number of blocks is small, just probe them directly.
+ if (NumBlocks < 3) {
+ if (!UseDForm)
+ MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
+ for (int i = 0; i < NumBlocks; ++i)
+ allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm);
+ if (needsCFI) {
+ // Restore using SPReg to calculate CFA.
+ buildDefCFAReg(PrologMBB, {MI}, SPReg);
+ }
+ } else {
+ // Since CTR is a volatile register and current shrinkwrap implementation
+ // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
+ // CTR loop to probe.
+ // Calculate trip count and stores it in CTRReg.
+ MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg);
+ BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
+ .addReg(ScratchReg, RegState::Kill);
+ if (!UseDForm)
+ MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
+ // Create MBBs of the loop.
+ MachineFunction::iterator MBBInsertPoint =
+ std::next(PrologMBB.getIterator());
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, LoopMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ExitMBB);
+ // Synthesize the loop body.
+ allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
+ UseDForm);
+ BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
+ .addMBB(LoopMBB);
+ LoopMBB->addSuccessor(ExitMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ // Synthesize the exit MBB.
+ ExitMBB->splice(ExitMBB->end(), &PrologMBB,
+ std::next(MachineBasicBlock::iterator(MI)),
+ PrologMBB.end());
+ ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB);
+ PrologMBB.addSuccessor(LoopMBB);
+ if (needsCFI) {
+ // Restore using SPReg to calculate CFA.
+ buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
+ }
+ // Update liveins.
+ recomputeLiveIns(*LoopMBB);
+ recomputeLiveIns(*ExitMBB);
+ }
+ ++NumPrologProbed;
+ MI.eraseFromParent();
+}
+
void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
@@ -1392,18 +1558,18 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Check if the link register (LR) has been saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
- const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
+ const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
bool MustSaveCR = !MustSaveCRs.empty();
// Do we have a frame pointer and/or base pointer for this function?
bool HasFP = hasFP(MF);
bool HasBP = RegInfo->hasBasePointer(MF);
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
- unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
Register BPReg = RegInfo->getBaseRegister(MF);
- unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
- unsigned ScratchReg = 0;
- unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ Register ScratchReg;
+ Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
: PPC::MTLR );
const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
@@ -1418,7 +1584,10 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
: PPC::ADDI );
const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
: PPC::ADD4 );
-
+ const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
+ : PPC::LWZ);
+ const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
+ : PPC::MTOCRF);
int LROffset = getReturnSaveOffset();
int FPOffset = 0;
@@ -1593,20 +1762,17 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// value (although not the base register). Make sure it is not overwritten
// too early.
- assert((isPPC64 || !MustSaveCR) &&
- "Epilogue CR restoring supported only in 64-bit mode");
-
// If we need to restore both the LR and the CR and we only have one
// available scratch register, we must do them one at a time.
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
// Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
// is live here.
assert(HasRedZone && "Expecting red zone");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
- .addImm(getCRSaveOffset())
+ BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
+ .addImm(CRSaveOffset)
.addReg(SPReg);
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
+ BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
.addReg(TempReg, getKillRegState(i == e-1));
}
@@ -1623,11 +1789,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
- // This will only occur for PPC64.
- assert(isPPC64 && "Expecting 64-bit mode");
assert(RBReg == SPReg && "Should be using SP as a base register");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
- .addImm(getCRSaveOffset())
+ BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
+ .addImm(CRSaveOffset)
.addReg(RBReg);
}
@@ -1682,9 +1846,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
if (MustSaveCR &&
- !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
+ !(SingleScratchReg && MustSaveLR))
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
+ BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
.addReg(TempReg, getKillRegState(i == e-1));
if (MustSaveLR)
@@ -1729,13 +1893,25 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
DebugLoc dl = MBBI->getDebugLoc();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
- // Create branch instruction for pseudo tail call return instruction
+ // Create branch instruction for pseudo tail call return instruction.
+ // The TCRETURNdi variants are direct calls. Valid targets for those are
+ // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
+ // since we can tail call external functions with PC-Rel (i.e. we don't need
+ // to worry about different TOC pointers). Some of the external functions will
+ // be MO_GlobalAddress while others like memcpy for example, are going to
+ // be MO_ExternalSymbol.
unsigned RetOpcode = MBBI->getOpcode();
if (RetOpcode == PPC::TCRETURNdi) {
MBBI = MBB.getLastNonDebugInstr();
MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ if (JumpTarget.isGlobal())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ else if (JumpTarget.isSymbol())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addExternalSymbol(JumpTarget.getSymbolName());
+ else
+ llvm_unreachable("Expecting Global or External Symbol");
} else if (RetOpcode == PPC::TCRETURNri) {
MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
@@ -1747,8 +1923,14 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
} else if (RetOpcode == PPC::TCRETURNdi8) {
MBBI = MBB.getLastNonDebugInstr();
MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ if (JumpTarget.isGlobal())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ else if (JumpTarget.isSymbol())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addExternalSymbol(JumpTarget.getSymbolName());
+ else
+ llvm_unreachable("Expecting Global or External Symbol");
} else if (RetOpcode == PPC::TCRETURNri8) {
MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
@@ -1776,7 +1958,6 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Save R31 if necessary
int FPSI = FI->getFramePointerSaveIndex();
const bool isPPC64 = Subtarget.isPPC64();
- const bool IsDarwinABI = Subtarget.isDarwinABI();
MachineFrameInfo &MFI = MF.getFrameInfo();
// If the frame pointer save index hasn't been defined yet.
@@ -1823,25 +2004,26 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
}
- // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
- // function uses CR 2, 3, or 4.
- if (!isPPC64 && !IsDarwinABI &&
- (SavedRegs.test(PPC::CR2) ||
- SavedRegs.test(PPC::CR3) ||
+ // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
+ // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
+ // object at the offset of the CR-save slot in the linkage area. The actual
+ // save and restore of the condition register will be created as part of the
+ // prologue and epilogue insertion, but the FixedStack object is needed to
+ // keep the CalleSavedInfo valid.
+ if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
SavedRegs.test(PPC::CR4))) {
- int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true);
+ const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
+ const int64_t SpillOffset =
+ Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
+ int FrameIdx =
+ MFI.CreateFixedObject(SpillSize, SpillOffset,
+ /* IsImmutable */ true, /* IsAliased */ false);
FI->setCRSpillFrameIndex(FrameIdx);
}
}
void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const {
- // Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI()) {
- addScavengingSpillSlot(MF, RS);
- return;
- }
-
// Get callee saved register information.
MachineFrameInfo &MFI = MF.getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
@@ -2014,11 +2196,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
std::min<unsigned>(TRI->getEncodingValue(MinGPR),
TRI->getEncodingValue(MinG8R));
- if (Subtarget.isPPC64()) {
- LowerBound -= (31 - MinReg + 1) * 8;
- } else {
- LowerBound -= (31 - MinReg + 1) * 4;
- }
+ const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
+ LowerBound -= (31 - MinReg + 1) * GPRegSize;
}
// For 32-bit only, the CR save area is below the general register
@@ -2026,19 +2205,13 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
// to the stack pointer and hence does not need an adjustment here.
// Only CR2 (the first nonvolatile spilled) has an associated frame
// index so that we have a single uniform save area.
- if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
+ if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
// Adjust the frame index of the CR spill slot.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
-
- if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
- // Leave Darwin logic as-is.
- || (!Subtarget.isSVR4ABI() &&
- (PPC::CRBITRCRegClass.contains(Reg) ||
- PPC::CRRCRegClass.contains(Reg)))) {
- int FI = CSI[i].getFrameIdx();
-
+ for (const auto &CSInfo : CSI) {
+ if (CSInfo.getReg() == PPC::CR2) {
+ int FI = CSInfo.getFrameIdx();
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
+ break;
}
}
@@ -2108,17 +2281,17 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
unsigned Size = TRI.getSpillSize(RC);
- unsigned Align = TRI.getSpillAlignment(RC);
- RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
+ Align Alignment = TRI.getSpillAlign(RC);
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
// Might we have over-aligned allocas?
- bool HasAlVars = MFI.hasVarSizedObjects() &&
- MFI.getMaxAlignment() > getStackAlignment();
+ bool HasAlVars =
+ MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
// These kinds of spills might need two registers.
if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
- RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
-
+ RS->addScavengingFrameIndex(
+ MFI.CreateStackObject(Size, Alignment, false));
}
}
@@ -2179,17 +2352,9 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots(
return AllSpilledToReg;
}
-
-bool
-PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
-
- // Currently, this function only handles SVR4 32- and 64-bit ABIs.
- // Return false otherwise to maintain pre-existing behavior.
- if (!Subtarget.isSVR4ABI())
- return false;
+bool PPCFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
@@ -2201,10 +2366,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- // Only Darwin actually uses the VRSAVE register, but it can still appear
- // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
- // Darwin, ignore it.
- if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
+ // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
+ if (Reg == PPC::VRSAVE)
continue;
// CR2 through CR4 are the nonvolatile CR fields.
@@ -2232,7 +2395,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Insert the spill to the stack frame.
if (IsCRField) {
PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
- if (Subtarget.isPPC64()) {
+ if (!Subtarget.is32BitELFABI()) {
// The actual spill will happen at the start of the prologue.
FuncInfo->addMustSaveCR(Reg);
} else {
@@ -2260,37 +2423,37 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Use !IsLiveIn for the kill flag.
// We do not want to kill registers that are live in this function
// before their use because they will become undefined registers.
- TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
- CSI[i].getFrameIdx(), RC, TRI);
+ // Functions without NoUnwind need to preserve the order of elements in
+ // saved vector registers.
+ if (Subtarget.needsSwapsForVSXMemOps() &&
+ !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
+ TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
+ CSI[i].getFrameIdx(), RC, TRI);
+ else
+ TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
+ RC, TRI);
}
}
}
return true;
}
-static void
-restoreCRs(bool isPPC64, bool is31,
- bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
+static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
+ bool CR4Spilled, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
DebugLoc DL;
- unsigned RestoreOp, MoveReg;
+ unsigned MoveReg = PPC::R12;
- if (isPPC64)
- // This is handled during epilogue generation.
- return;
- else {
- // 32-bit: FP-relative
- MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
- PPC::R12),
- CSI[CSIIndex].getFrameIdx()));
- RestoreOp = PPC::MTOCRF;
- MoveReg = PPC::R12;
- }
+ // 32-bit: FP-relative
+ MBB.insert(MI,
+ addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
+ CSI[CSIIndex].getFrameIdx()));
+ unsigned RestoreOp = PPC::MTOCRF;
if (CR2Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
.addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
@@ -2343,17 +2506,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
return MBB.erase(I);
}
-bool
-PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
-
- // Currently, this function only handles SVR4 32- and 64-bit ABIs.
- // Return false otherwise to maintain pre-existing behavior.
- if (!Subtarget.isSVR4ABI())
- return false;
+static bool isCalleeSavedCR(unsigned Reg) {
+ return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
+}
+bool PPCFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
@@ -2374,15 +2533,18 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- // Only Darwin actually uses the VRSAVE register, but it can still appear
- // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
- // Darwin, ignore it.
- if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
+ // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
+ if (Reg == PPC::VRSAVE)
continue;
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
continue;
+ // Restore of callee saved condition register field is handled during
+ // epilogue insertion.
+ if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
+ continue;
+
if (Reg == PPC::CR2) {
CR2Spilled = true;
// The spill slot is associated only with CR2, which is the
@@ -2396,14 +2558,12 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
CR4Spilled = true;
continue;
} else {
- // When we first encounter a non-CR register after seeing at
+ // On 32-bit ELF when we first encounter a non-CR register after seeing at
// least one CR register, restore all spilled CRs together.
- if ((CR2Spilled || CR3Spilled || CR4Spilled)
- && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ if (CR2Spilled || CR3Spilled || CR4Spilled) {
bool is31 = needsFP(*MF);
- restoreCRs(Subtarget.isPPC64(), is31,
- CR2Spilled, CR3Spilled, CR4Spilled,
- MBB, I, CSI, CSIIndex);
+ restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
+ CSIIndex);
CR2Spilled = CR3Spilled = CR4Spilled = false;
}
@@ -2415,7 +2575,16 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
} else {
// Default behavior for non-CR saves.
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+
+ // Functions without NoUnwind need to preserve the order of elements in
+ // saved vector registers.
+ if (Subtarget.needsSwapsForVSXMemOps() &&
+ !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
+ TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
+ TRI);
+ else
+ TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+
assert(I != MBB.begin() &&
"loadRegFromStackSlot didn't insert any code!");
}
@@ -2432,9 +2601,10 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// If we haven't yet spilled the CRs, do so now.
if (CR2Spilled || CR3Spilled || CR4Spilled) {
+ assert(Subtarget.is32BitELFABI() &&
+ "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
bool is31 = needsFP(*MF);
- restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
- MBB, I, CSI, CSIIndex);
+ restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
}
return true;
@@ -2445,14 +2615,10 @@ unsigned PPCFrameLowering::getTOCSaveOffset() const {
}
unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
- if (Subtarget.isAIXABI())
- report_fatal_error("FramePointer is not implemented on AIX yet.");
return FramePointerSaveOffset;
}
unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
- if (Subtarget.isAIXABI())
- report_fatal_error("BasePointer is not implemented on AIX yet.");
return BasePointerSaveOffset;
}
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index a5fbc9acbb28c..8bf52c0ed01ab 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -61,8 +61,8 @@ class PPCFrameLowering: public TargetFrameLowering {
bool findScratchRegister(MachineBasicBlock *MBB,
bool UseAtEnd,
bool TwoUniqueRegsRequired = false,
- unsigned *SR1 = nullptr,
- unsigned *SR2 = nullptr) const;
+ Register *SR1 = nullptr,
+ Register *SR2 = nullptr) const;
bool twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const;
/**
@@ -100,6 +100,8 @@ public:
/// the function.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const override;
bool hasFP(const MachineFunction &MF) const override;
bool needsFP(const MachineFunction &MF) const;
@@ -113,7 +115,7 @@ public:
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
/// This function will assign callee saved gprs to volatile vector registers
/// for prologue spills when applicable. It returns false if there are any
@@ -127,10 +129,11 @@ public:
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const override;
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
/// targetHandlesStackFrameRounding - Returns true if the target is
/// responsible for rounding up the stack frame (probably at emitPrologue
@@ -153,10 +156,6 @@ public:
/// base pointer.
unsigned getBasePointerSaveOffset() const;
- /// getCRSaveOffset - Return the previous frame offset to save the
- /// CR register.
- unsigned getCRSaveOffset() const { return CRSaveOffset; }
-
/// getLinkageSize - Return the size of the PowerPC ABI linkage area.
///
unsigned getLinkageSize() const { return LinkageSize; }
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 776ec52e26048..8ffd89ef5ccd2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -139,6 +139,7 @@ namespace {
class PPCDAGToDAGISel : public SelectionDAGISel {
const PPCTargetMachine &TM;
const PPCSubtarget *PPCSubTarget = nullptr;
+ const PPCSubtarget *Subtarget = nullptr;
const PPCTargetLowering *PPCLowering = nullptr;
unsigned GlobalBaseReg = 0;
@@ -150,10 +151,11 @@ namespace {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
- PPCLowering = PPCSubTarget->getTargetLowering();
+ Subtarget = &MF.getSubtarget<PPCSubtarget>();
+ PPCLowering = Subtarget->getTargetLowering();
SelectionDAGISel::runOnMachineFunction(MF);
- if (!PPCSubTarget->isSVR4ABI())
+ if (!Subtarget->isSVR4ABI())
InsertVRSaveCode(MF);
return true;
@@ -204,7 +206,6 @@ namespace {
bool tryBitfieldInsert(SDNode *N);
bool tryBitPermutation(SDNode *N);
bool tryIntCompareInGPR(SDNode *N);
- bool tryAndWithMask(SDNode *N);
// tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
// an X-Form load instruction with the offset being a relocation coming from
@@ -239,7 +240,7 @@ namespace {
/// bit signed displacement.
/// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
- return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0);
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None);
}
/// SelectAddrIdx4 - Given the specified address, check to see if it can be
@@ -249,7 +250,8 @@ namespace {
/// displacement must be a multiple of 4.
/// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
- return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4);
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
+ Align(4));
}
/// SelectAddrIdx16 - Given the specified address, check to see if it can be
@@ -259,7 +261,8 @@ namespace {
/// displacement must be a multiple of 16.
/// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
- return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16);
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
+ Align(16));
}
/// SelectAddrIdxOnly - Given the specified address, force it to be
@@ -267,28 +270,29 @@ namespace {
bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
}
-
+
/// SelectAddrImm - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement [r+imm].
/// The last parameter \p 0 means D form has no requirment for 16 bit signed
/// displacement.
bool SelectAddrImm(SDValue N, SDValue &Disp,
SDValue &Base) {
- return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None);
}
/// SelectAddrImmX4 - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement that is a multiple of
/// 4 (last parameter). Suitable for use by STD and friends.
bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
- return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
}
/// SelectAddrImmX16 - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement that is a multiple of
/// 16(last parameter). Suitable for use by STXV and friends.
bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
- return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
+ Align(16));
}
// Select an address into a single register.
@@ -297,6 +301,10 @@ namespace {
return true;
}
+ bool SelectAddrPCRel(SDValue N, SDValue &Base) {
+ return PPCLowering->SelectAddressPCRel(N, Base);
+ }
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions. It is always correct to compute the value into
/// a register. The case of adding a (possibly relocatable) constant to a
@@ -317,7 +325,7 @@ namespace {
case InlineAsm::Constraint_Zy:
// We need to make sure that this one operand does not end up in r0
// (because we might end up lowering this as 0(%op)).
- const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
+ const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
SDLoc dl(Op);
SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
@@ -343,6 +351,13 @@ namespace {
private:
bool trySETCC(SDNode *N);
+ bool tryAsSingleRLDICL(SDNode *N);
+ bool tryAsSingleRLDICR(SDNode *N);
+ bool tryAsSingleRLWINM(SDNode *N);
+ bool tryAsSingleRLWINM8(SDNode *N);
+ bool tryAsSingleRLWIMI(SDNode *N);
+ bool tryAsPairOfRLDICL(SDNode *N);
+ bool tryAsSingleRLDIMI(SDNode *N);
void PeepholePPC64();
void PeepholePPC64ZExt();
@@ -394,7 +409,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
- const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
MachineBasicBlock &EntryBB = *Fn.begin();
DebugLoc dl;
// Emit the following code into the entry block:
@@ -429,7 +444,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
///
SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
if (!GlobalBaseReg) {
- const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = MF->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
@@ -437,9 +452,9 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
DebugLoc dl;
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
- if (PPCSubTarget->isTargetELF()) {
+ if (Subtarget->isTargetELF()) {
GlobalBaseReg = PPC::R30;
- if (!PPCSubTarget->isSecurePlt() &&
+ if (!Subtarget->isSecurePlt() &&
M->getPICLevel() == PICLevel::SmallPIC) {
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
@@ -3788,7 +3803,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
Opc = PPC::CMPD;
}
} else if (LHS.getValueType() == MVT::f32) {
- if (PPCSubTarget->hasSPE()) {
+ if (Subtarget->hasSPE()) {
switch (CC) {
default:
case ISD::SETEQ:
@@ -3815,7 +3830,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
} else
Opc = PPC::FCMPUS;
} else if (LHS.getValueType() == MVT::f64) {
- if (PPCSubTarget->hasSPE()) {
+ if (Subtarget->hasSPE()) {
switch (CC) {
default:
case ISD::SETEQ:
@@ -3840,10 +3855,10 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
break;
}
} else
- Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
+ Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
} else {
assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
- assert(PPCSubTarget->hasVSX() && "__float128 requires VSX");
+ assert(Subtarget->hasVSX() && "__float128 requires VSX");
Opc = PPC::XSCMPUQP;
}
return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
@@ -3872,10 +3887,10 @@ static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
case ISD::SETULE:
case ISD::SETLE:
- return UseSPE ? PPC::PRED_LE : PPC::PRED_LE;
+ return PPC::PRED_LE;
case ISD::SETOGT:
case ISD::SETGT:
- return UseSPE ? PPC::PRED_GT : PPC::PRED_GT;
+ return PPC::PRED_GT;
case ISD::SETUGE:
case ISD::SETGE:
return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
@@ -4038,8 +4053,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
bool isPPC64 = (PtrVT == MVT::i64);
- if (!PPCSubTarget->useCRBits() &&
- isInt32Immediate(N->getOperand(1), Imm)) {
+ if (!Subtarget->useCRBits() && isInt32Immediate(N->getOperand(1), Imm)) {
// We can codegen setcc op, imm very efficiently compared to a brcond.
// Check for those cases here.
// setcc op, 0
@@ -4128,20 +4142,20 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
// Altivec Vector compare instructions do not set any CR register by default and
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
- if (PPCSubTarget->hasQPX() || PPCSubTarget->hasSPE())
+ if (Subtarget->hasQPX() || Subtarget->hasSPE())
return false;
EVT VecVT = LHS.getValueType();
bool Swap, Negate;
- unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
- PPCSubTarget->hasVSX(), Swap, Negate);
+ unsigned int VCmpInst =
+ getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
if (Swap)
std::swap(LHS, RHS);
EVT ResVT = VecVT.changeVectorElementTypeToInteger();
if (Negate) {
SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
- CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
+ CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
ResVT, VCmp, VCmp);
return true;
}
@@ -4150,7 +4164,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
return true;
}
- if (PPCSubTarget->useCRBits())
+ if (Subtarget->useCRBits())
return false;
bool Inv;
@@ -4160,7 +4174,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
// SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
// The correct compare instruction is already set by SelectCC()
- if (PPCSubTarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
+ if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
Idx = 1;
}
@@ -4209,7 +4223,7 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
// because it is translated to r31 or r1 + slot + offset. We won't know the
// slot number until the stack frame is finalized.
const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
- unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex());
+ unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
if ((SlotAlign % Val) != 0)
return false;
@@ -4241,13 +4255,10 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
SDValue TrueRes = N->getOperand(2);
SDValue FalseRes = N->getOperand(3);
ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
- if (!TrueConst)
+ if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
+ N->getSimpleValueType(0) != MVT::i32))
return false;
- assert((N->getSimpleValueType(0) == MVT::i64 ||
- N->getSimpleValueType(0) == MVT::i32) &&
- "Expecting either i64 or i32 here.");
-
// We are looking for any of:
// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
@@ -4371,142 +4382,251 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
return true;
}
-bool PPCDAGToDAGISel::tryAndWithMask(SDNode *N) {
- if (N->getOpcode() != ISD::AND)
+bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+ unsigned Imm;
+ if (!isInt32Immediate(N->getOperand(1), Imm))
return false;
SDLoc dl(N);
SDValue Val = N->getOperand(0);
- unsigned Imm, Imm2, SH, MB, ME;
- uint64_t Imm64;
-
+ unsigned SH, MB, ME;
// If this is an and of a value rotated between 0 and 31 bits and then and'd
// with a mask, emit rlwinm
- if (isInt32Immediate(N->getOperand(1), Imm) &&
- isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
- SDValue Val = N->getOperand(0).getOperand(0);
- SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
- getI32Imm(ME, dl) };
+ if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
+ Val = Val.getOperand(0);
+ SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl)};
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
return true;
}
// If this is just a masked value where the input is not handled, and
// is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
- if (isInt32Immediate(N->getOperand(1), Imm)) {
- if (isRunOfOnes(Imm, MB, ME) &&
- N->getOperand(0).getOpcode() != ISD::ROTL) {
- SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
- getI32Imm(ME, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
- return true;
- }
- // AND X, 0 -> 0, not "rlwinm 32".
- if (Imm == 0) {
- ReplaceUses(SDValue(N, 0), N->getOperand(1));
- return true;
- }
+ if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
+ SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
+ getI32Imm(ME, dl)};
+ CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ return true;
+ }
- // ISD::OR doesn't get all the bitfield insertion fun.
- // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
- // bitfield insert.
- if (N->getOperand(0).getOpcode() == ISD::OR &&
- isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
- // The idea here is to check whether this is equivalent to:
- // (c1 & m) | (x & ~m)
- // where m is a run-of-ones mask. The logic here is that, for each bit in
- // c1 and c2:
- // - if both are 1, then the output will be 1.
- // - if both are 0, then the output will be 0.
- // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
- // come from x.
- // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
- // be 0.
- // If that last condition is never the case, then we can form m from the
- // bits that are the same between c1 and c2.
- unsigned MB, ME;
- if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
- SDValue Ops[] = { N->getOperand(0).getOperand(0),
- N->getOperand(0).getOperand(1),
- getI32Imm(0, dl), getI32Imm(MB, dl),
- getI32Imm(ME, dl) };
- ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
- return true;
- }
- }
- } else if (isInt64Immediate(N->getOperand(1).getNode(), Imm64)) {
- // If this is a 64-bit zero-extension mask, emit rldicl.
- if (isMask_64(Imm64)) {
- MB = 64 - countTrailingOnes(Imm64);
- SH = 0;
-
- if (Val.getOpcode() == ISD::ANY_EXTEND) {
- auto Op0 = Val.getOperand(0);
- if ( Op0.getOpcode() == ISD::SRL &&
- isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
-
- auto ResultType = Val.getNode()->getValueType(0);
- auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
- ResultType);
- SDValue IDVal (ImDef, 0);
-
- Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
- ResultType, IDVal, Op0.getOperand(0),
- getI32Imm(1, dl)), 0);
- SH = 64 - Imm;
- }
- }
+ // AND X, 0 -> 0, not "rlwinm 32".
+ if (Imm == 0) {
+ ReplaceUses(SDValue(N, 0), N->getOperand(1));
+ return true;
+ }
- // If the operand is a logical right shift, we can fold it into this
- // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
- // for n <= mb. The right shift is really a left rotate followed by a
- // mask, and this mask is a more-restrictive sub-mask of the mask implied
- // by the shift.
- if (Val.getOpcode() == ISD::SRL &&
- isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
- assert(Imm < 64 && "Illegal shift amount");
- Val = Val.getOperand(0);
- SH = 64 - Imm;
- }
+ return false;
+}
- SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
- return true;
- } else if (isMask_64(~Imm64)) {
- // If this is a negated 64-bit zero-extension mask,
- // i.e. the immediate is a sequence of ones from most significant side
- // and all zero for reminder, we should use rldicr.
- MB = 63 - countTrailingOnes(~Imm64);
- SH = 0;
- SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
- return true;
- }
+bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+ uint64_t Imm64;
+ if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
+ return false;
- // It is not 16-bit imm that means we need two instructions at least if
- // using "and" instruction. Try to exploit it with rotate mask instructions.
- if (isRunOfOnes64(Imm64, MB, ME)) {
- if (MB >= 32 && MB <= ME) {
- // MB ME
- // +----------------------+
- // |xxxxxxxxxxx00011111000|
- // +----------------------+
- // 0 32 64
- // We can only do it if the MB is larger than 32 and MB <= ME
- // as RLWINM will replace the content of [0 - 32) with [32 - 64) even
- // we didn't rotate it.
- SDValue Ops[] = { Val, getI64Imm(0, dl), getI64Imm(MB - 32, dl),
- getI64Imm(ME - 32, dl) };
- CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
- return true;
- }
- // TODO - handle it with rldicl + rldicl
- }
+ unsigned MB, ME;
+ if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
+ // MB ME
+ // +----------------------+
+ // |xxxxxxxxxxx00011111000|
+ // +----------------------+
+ // 0 32 64
+ // We can only do it if the MB is larger than 32 and MB <= ME
+ // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
+ // we didn't rotate it.
+ SDLoc dl(N);
+ SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
+ getI64Imm(ME - 32, dl)};
+ CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+ uint64_t Imm64;
+ if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
+ return false;
+
+ // Do nothing if it is 16-bit imm as the pattern in the .td file handle
+ // it well with "andi.".
+ if (isUInt<16>(Imm64))
+ return false;
+
+ SDLoc Loc(N);
+ SDValue Val = N->getOperand(0);
+
+ // Optimized with two rldicl's as follows:
+ // Add missing bits on left to the mask and check that the mask is a
+ // wrapped run of ones, i.e.
+ // Change pattern |0001111100000011111111|
+ // to |1111111100000011111111|.
+ unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
+ if (NumOfLeadingZeros != 0)
+ Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
+
+ unsigned MB, ME;
+ if (!isRunOfOnes64(Imm64, MB, ME))
+ return false;
+
+ // ME MB MB-ME+63
+ // +----------------------+ +----------------------+
+ // |1111111100000011111111| -> |0000001111111111111111|
+ // +----------------------+ +----------------------+
+ // 0 63 0 63
+ // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
+ unsigned OnesOnLeft = ME + 1;
+ unsigned ZerosInBetween = (MB - ME + 63) & 63;
+ // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
+ // on the left the bits that are already zeros in the mask.
+ Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
+ getI64Imm(OnesOnLeft, Loc),
+ getI64Imm(ZerosInBetween, Loc)),
+ 0);
+ // MB-ME+63 ME MB
+ // +----------------------+ +----------------------+
+ // |0000001111111111111111| -> |0001111100000011111111|
+ // +----------------------+ +----------------------+
+ // 0 63 0 63
+ // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
+ // left the number of ones we previously added.
+ SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
+ getI64Imm(NumOfLeadingZeros, Loc)};
+ CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
+ return true;
+}
+
+bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+ unsigned Imm;
+ if (!isInt32Immediate(N->getOperand(1), Imm))
+ return false;
+
+ SDValue Val = N->getOperand(0);
+ unsigned Imm2;
+ // ISD::OR doesn't get all the bitfield insertion fun.
+ // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
+ // bitfield insert.
+ if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
+ return false;
+
+ // The idea here is to check whether this is equivalent to:
+ // (c1 & m) | (x & ~m)
+ // where m is a run-of-ones mask. The logic here is that, for each bit in
+ // c1 and c2:
+ // - if both are 1, then the output will be 1.
+ // - if both are 0, then the output will be 0.
+ // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
+ // come from x.
+ // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
+ // be 0.
+ // If that last condition is never the case, then we can form m from the
+ // bits that are the same between c1 and c2.
+ unsigned MB, ME;
+ if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
+ SDLoc dl(N);
+ SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
+ getI32Imm(MB, dl), getI32Imm(ME, dl)};
+ ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
+ return true;
}
return false;
}
+bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+ uint64_t Imm64;
+ if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
+ return false;
+
+ // If this is a 64-bit zero-extension mask, emit rldicl.
+ unsigned MB = 64 - countTrailingOnes(Imm64);
+ unsigned SH = 0;
+ unsigned Imm;
+ SDValue Val = N->getOperand(0);
+ SDLoc dl(N);
+
+ if (Val.getOpcode() == ISD::ANY_EXTEND) {
+ auto Op0 = Val.getOperand(0);
+ if (Op0.getOpcode() == ISD::SRL &&
+ isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
+
+ auto ResultType = Val.getNode()->getValueType(0);
+ auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
+ SDValue IDVal(ImDef, 0);
+
+ Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
+ IDVal, Op0.getOperand(0),
+ getI32Imm(1, dl)),
+ 0);
+ SH = 64 - Imm;
+ }
+ }
+
+ // If the operand is a logical right shift, we can fold it into this
+ // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
+ // for n <= mb. The right shift is really a left rotate followed by a
+ // mask, and this mask is a more-restrictive sub-mask of the mask implied
+ // by the shift.
+ if (Val.getOpcode() == ISD::SRL &&
+ isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
+ assert(Imm < 64 && "Illegal shift amount");
+ Val = Val.getOperand(0);
+ SH = 64 - Imm;
+ }
+
+ SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
+ CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
+ return true;
+}
+
+bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+ uint64_t Imm64;
+ if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
+ !isMask_64(~Imm64))
+ return false;
+
+ // If this is a negated 64-bit zero-extension mask,
+ // i.e. the immediate is a sequence of ones from most significant side
+ // and all zero for reminder, we should use rldicr.
+ unsigned MB = 63 - countTrailingOnes(~Imm64);
+ unsigned SH = 0;
+ SDLoc dl(N);
+ SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
+ CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
+ return true;
+}
+
+bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
+ assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
+ uint64_t Imm64;
+ unsigned MB, ME;
+ SDValue N0 = N->getOperand(0);
+
+ // We won't get fewer instructions if the imm is 32-bit integer.
+ // rldimi requires the imm to have consecutive ones with both sides zero.
+ // Also, make sure the first Op has only one use, otherwise this may increase
+ // register pressure since rldimi is destructive.
+ if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
+ isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
+ return false;
+
+ unsigned SH = 63 - ME;
+ SDLoc Dl(N);
+ // Use select64Imm for making LI instr instead of directly putting Imm64
+ SDValue Ops[] = {
+ N->getOperand(0),
+ SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
+ getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
+ CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
+ return true;
+}
+
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
void PPCDAGToDAGISel::Select(SDNode *N) {
@@ -4551,7 +4671,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case PPCISD::ADDI_TLSGD_L_ADDR: {
const Module *Mod = MF->getFunction().getParent();
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
- !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() ||
+ !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
Mod->getPICLevel() == PICLevel::SmallPIC)
break;
// Attach global base pointer on GETtlsADDR32 node in order to
@@ -4560,8 +4680,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
} break;
case PPCISD::CALL: {
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
- !TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt() ||
- !PPCSubTarget->isTargetELF())
+ !TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||
+ !Subtarget->isTargetELF())
break;
SDValue Op = N->getOperand(1);
@@ -4625,7 +4745,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case ISD::STORE: {
// Change TLS initial-exec D-form stores to X-form stores.
StoreSDNode *ST = cast<StoreSDNode>(N);
- if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() &&
+ if (EnableTLSOpt && Subtarget->isELFv2ABI() &&
ST->getAddressingMode() != ISD::PRE_INC)
if (tryTLSXFormStore(ST))
return;
@@ -4639,7 +4759,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// Normal loads are handled by code generated from the .td file.
if (LD->getAddressingMode() != ISD::PRE_INC) {
// Change TLS initial-exec D-form loads to X-form loads.
- if (EnableTLSOpt && PPCSubTarget->isELFv2ABI())
+ if (EnableTLSOpt && Subtarget->isELFv2ABI())
if (tryTLSXFormLoad(LD))
return;
break;
@@ -4730,7 +4850,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case ISD::AND:
// If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
- if (tryAndWithMask(N))
+ if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
+ tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
return;
// Other cases are autogenerated.
@@ -4753,10 +4874,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
}
+ // If this is 'or' against an imm with consecutive ones and both sides zero,
+ // try to emit rldimi
+ if (tryAsSingleRLDIMI(N))
+ return;
+
// OR with a 32-bit immediate can be handled by ori + oris
// without creating an immediate in a GPR.
uint64_t Imm64 = 0;
- bool IsPPC64 = PPCSubTarget->isPPC64();
+ bool IsPPC64 = Subtarget->isPPC64();
if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
(Imm64 & ~0xFFFFFFFFuLL) == 0) {
// If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
@@ -4779,7 +4905,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// XOR with a 32-bit immediate can be handled by xori + xoris
// without creating an immediate in a GPR.
uint64_t Imm64 = 0;
- bool IsPPC64 = PPCSubTarget->isPPC64();
+ bool IsPPC64 = Subtarget->isPPC64();
if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
(Imm64 & ~0xFFFFFFFFuLL) == 0) {
// If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
@@ -4866,11 +4992,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
bool isPPC64 = (PtrVT == MVT::i64);
// If this is a select of i1 operands, we'll pattern match it.
- if (PPCSubTarget->useCRBits() &&
- N->getOperand(0).getValueType() == MVT::i1)
+ if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
break;
- if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) {
+ if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
bool NeedSwapOps = false;
bool IsUnCmp = false;
if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
@@ -4945,7 +5070,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
unsigned BROpc =
- getPredicateForSetCC(CC, N->getOperand(0).getValueType(), PPCSubTarget);
+ getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
unsigned SelectCCOp;
if (N->getValueType(0) == MVT::i32)
@@ -4953,28 +5078,28 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
else if (N->getValueType(0) == MVT::i64)
SelectCCOp = PPC::SELECT_CC_I8;
else if (N->getValueType(0) == MVT::f32) {
- if (PPCSubTarget->hasP8Vector())
+ if (Subtarget->hasP8Vector())
SelectCCOp = PPC::SELECT_CC_VSSRC;
- else if (PPCSubTarget->hasSPE())
+ else if (Subtarget->hasSPE())
SelectCCOp = PPC::SELECT_CC_SPE4;
else
SelectCCOp = PPC::SELECT_CC_F4;
} else if (N->getValueType(0) == MVT::f64) {
- if (PPCSubTarget->hasVSX())
+ if (Subtarget->hasVSX())
SelectCCOp = PPC::SELECT_CC_VSFRC;
- else if (PPCSubTarget->hasSPE())
+ else if (Subtarget->hasSPE())
SelectCCOp = PPC::SELECT_CC_SPE;
else
SelectCCOp = PPC::SELECT_CC_F8;
} else if (N->getValueType(0) == MVT::f128)
SelectCCOp = PPC::SELECT_CC_F16;
- else if (PPCSubTarget->hasSPE())
+ else if (Subtarget->hasSPE())
SelectCCOp = PPC::SELECT_CC_SPE;
- else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
+ else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
SelectCCOp = PPC::SELECT_CC_QFRC;
- else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
+ else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
SelectCCOp = PPC::SELECT_CC_QSRC;
- else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
+ else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
SelectCCOp = PPC::SELECT_CC_QBRC;
else if (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)
@@ -4988,8 +5113,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
case ISD::VECTOR_SHUFFLE:
- if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
- N->getValueType(0) == MVT::v2i64)) {
+ if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
+ N->getValueType(0) == MVT::v2i64)) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
@@ -5024,7 +5149,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// For little endian, we must swap the input operands and adjust
// the mask elements (reverse and invert them).
- if (PPCSubTarget->isLittleEndian()) {
+ if (Subtarget->isLittleEndian()) {
std::swap(Op1, Op2);
unsigned tmp = DM[0];
DM[0] = 1 - DM[1];
@@ -5041,7 +5166,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
break;
case PPCISD::BDNZ:
case PPCISD::BDZ: {
- bool IsPPC64 = PPCSubTarget->isPPC64();
+ bool IsPPC64 = Subtarget->isPPC64();
SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
@@ -5069,7 +5194,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case ISD::BR_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
unsigned PCC =
- getPredicateForSetCC(CC, N->getOperand(2).getValueType(), PPCSubTarget);
+ getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
if (N->getOperand(2).getValueType() == MVT::i1) {
unsigned Opc;
@@ -5122,11 +5247,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
case PPCISD::TOC_ENTRY: {
- const bool isPPC64 = PPCSubTarget->isPPC64();
- const bool isELFABI = PPCSubTarget->isSVR4ABI();
- const bool isAIXABI = PPCSubTarget->isAIXABI();
-
- assert(!PPCSubTarget->isDarwin() && "TOC is an ELF/XCOFF construct");
+ const bool isPPC64 = Subtarget->isPPC64();
+ const bool isELFABI = Subtarget->isSVR4ABI();
+ const bool isAIXABI = Subtarget->isAIXABI();
// PowerPC only support small, medium and large code model.
const CodeModel::Model CModel = TM.getCodeModel();
@@ -5177,7 +5300,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We
// generate two instructions as described below. The first source operand
// is a symbol reference. If it must be toc-referenced according to
- // PPCSubTarget, we generate:
+ // Subtarget, we generate:
// [32-bit AIX]
// LWZtocL(@sym, ADDIStocHA(%r2, @sym))
// [64-bit ELF/AIX]
@@ -5209,7 +5332,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
case PPCISD::PPC32_PICGOT:
// Generate a PIC-safe GOT reference.
- assert(PPCSubTarget->is32BitELFABI() &&
+ assert(Subtarget->is32BitELFABI() &&
"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
PPCLowering->getPointerTy(CurDAG->getDataLayout()),
@@ -5306,7 +5429,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
"Only OR nodes are supported for CMPB");
SDValue Res;
- if (!PPCSubTarget->hasCMPB())
+ if (!Subtarget->hasCMPB())
return Res;
if (N->getValueType(0) != MVT::i32 &&
@@ -5517,7 +5640,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
// only one instruction (like a zero or one), then we should fold in those
// operations with the select.
void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
- if (!PPCSubTarget->useCRBits())
+ if (!Subtarget->useCRBits())
return;
if (N->getOpcode() != ISD::ZERO_EXTEND &&
@@ -5549,8 +5672,7 @@ void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
- User->getValueType(0),
- O0.getNode(), O1.getNode());
+ User->getValueType(0), {O0, O1});
};
// FIXME: When the semantics of the interaction between select and undef
@@ -6259,7 +6381,7 @@ static bool PeepholePPC64ZExtGather(SDValue Op32,
}
void PPCDAGToDAGISel::PeepholePPC64ZExt() {
- if (!PPCSubTarget->isPPC64())
+ if (!Subtarget->isPPC64())
return;
// When we zero-extend from i32 to i64, we use a pattern like this:
@@ -6428,10 +6550,6 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() {
}
void PPCDAGToDAGISel::PeepholePPC64() {
- // These optimizations are currently supported only for 64-bit SVR4.
- if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
- return;
-
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
while (Position != CurDAG->allnodes_begin()) {
@@ -6544,7 +6662,8 @@ void PPCDAGToDAGISel::PeepholePPC64() {
int MaxDisplacement = 7;
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
const GlobalValue *GV = GA->getGlobal();
- MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement);
+ Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
+ MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
}
bool UpdateHBase = false;
@@ -6610,10 +6729,10 @@ void PPCDAGToDAGISel::PeepholePPC64() {
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
+ Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
// We can't perform this optimization for data whose alignment
// is insufficient for the instruction encoding.
- if (GV->getAlignment() < 4 &&
- (RequiresMod4Offset || (Offset % 4) != 0)) {
+ if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
continue;
}
@@ -6621,8 +6740,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
} else if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
const Constant *C = CP->getConstVal();
- ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
- CP->getAlignment(),
+ ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
Offset, Flags);
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 60ed72e1018bd..ddfbd04e1ebc5 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -55,7 +55,6 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -118,14 +117,13 @@ cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
-static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
-cl::desc("enable quad precision float support on ppc"), cl::Hidden);
-
static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
cl::desc("use absolute jump tables on ppc"), cl::Hidden);
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
+STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
+STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
@@ -167,6 +165,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
}
+ if (Subtarget.isISA3_0()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
+ setTruncStoreAction(MVT::f64, MVT::f16, Legal);
+ setTruncStoreAction(MVT::f32, MVT::f16, Legal);
+ } else {
+ // No extending loads from f16 or HW conversions back and forth.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ }
+
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PowerPC has pre-inc load and store's.
@@ -243,15 +258,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// PowerPC has no SREM/UREM instructions unless we are on P9
// On P9 we may use a hardware instruction to compute the remainder.
- // The instructions are not legalized directly because in the cases where the
- // result of both the remainder and the division is required it is more
- // efficient to compute the remainder from the result of the division rather
- // than use the remainder instruction.
+ // When the result of both the remainder and the division is required it is
+ // more efficient to compute the remainder from the result of the division
+ // rather than use the remainder instruction. The instructions are legalized
+ // directly because the DivRemPairsPass performs the transformation at the IR
+ // level.
if (Subtarget.isISA3_0()) {
- setOperationAction(ISD::SREM, MVT::i32, Custom);
- setOperationAction(ISD::UREM, MVT::i32, Custom);
- setOperationAction(ISD::SREM, MVT::i64, Custom);
- setOperationAction(ISD::UREM, MVT::i64, Custom);
+ setOperationAction(ISD::SREM, MVT::i32, Legal);
+ setOperationAction(ISD::UREM, MVT::i32, Legal);
+ setOperationAction(ISD::SREM, MVT::i64, Legal);
+ setOperationAction(ISD::UREM, MVT::i64, Legal);
} else {
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
@@ -269,6 +285,40 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ // Handle constrained floating-point operations of scalar.
+ // TODO: Handle SPE specific operation.
+ setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
+
+ setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
+ if (Subtarget.hasVSX())
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Legal);
+
+ if (Subtarget.hasFSQRT()) {
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
+ }
+
+ if (Subtarget.hasFPRND()) {
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
+
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
+ }
+
// We don't support sin/cos/sqrt/fmod/pow
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
@@ -373,6 +423,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.hasSPE()) {
// SPE has built-in conversions
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
@@ -522,9 +575,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
- if (Subtarget.hasSPE())
+ if (Subtarget.hasSPE()) {
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
- else
+ } else
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
@@ -567,6 +621,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
if (Subtarget.hasAltivec()) {
+ for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
+ setOperationAction(ISD::SADDSAT, VT, Legal);
+ setOperationAction(ISD::SSUBSAT, VT, Legal);
+ setOperationAction(ISD::UADDSAT, VT, Legal);
+ setOperationAction(ISD::USUBSAT, VT, Legal);
+ }
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -677,6 +737,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
+ setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
if (!Subtarget.hasP8Vector()) {
setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
@@ -720,6 +781,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (!Subtarget.hasP8Altivec())
setOperationAction(ISD::ABS, MVT::v2i64, Expand);
+ // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
+ setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
// With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
if (Subtarget.hasAltivec())
for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
@@ -746,7 +809,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
else
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
- setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
@@ -793,12 +856,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::MUL, MVT::v2f64, Legal);
setOperationAction(ISD::FMA, MVT::v2f64, Legal);
@@ -888,6 +955,37 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+ // Handle constrained floating-point operations of vector.
+ // The predictor is `hasVSX` because altivec instruction has
+ // no exception but VSX vector instruction has.
+ setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
+
addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
}
@@ -907,44 +1005,59 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SRL, MVT::v1i128, Legal);
setOperationAction(ISD::SRA, MVT::v1i128, Expand);
- if (EnableQuadPrecision) {
- addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
- setOperationAction(ISD::FADD, MVT::f128, Legal);
- setOperationAction(ISD::FSUB, MVT::f128, Legal);
- setOperationAction(ISD::FDIV, MVT::f128, Legal);
- setOperationAction(ISD::FMUL, MVT::f128, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
- // No extending loads to f128 on PPC.
- for (MVT FPT : MVT::fp_valuetypes())
- setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
- setOperationAction(ISD::FMA, MVT::f128, Legal);
- setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
- setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
- setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
- setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
- setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
- setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
-
- setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
- setOperationAction(ISD::FRINT, MVT::f128, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
- setOperationAction(ISD::FCEIL, MVT::f128, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
- setOperationAction(ISD::FROUND, MVT::f128, Legal);
-
- setOperationAction(ISD::SELECT, MVT::f128, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
- setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
- setTruncStoreAction(MVT::f128, MVT::f64, Expand);
- setTruncStoreAction(MVT::f128, MVT::f32, Expand);
- setOperationAction(ISD::BITCAST, MVT::i128, Custom);
- // No implementation for these ops for PowerPC.
- setOperationAction(ISD::FSIN , MVT::f128, Expand);
- setOperationAction(ISD::FCOS , MVT::f128, Expand);
- setOperationAction(ISD::FPOW, MVT::f128, Expand);
- setOperationAction(ISD::FPOWI, MVT::f128, Expand);
- setOperationAction(ISD::FREM, MVT::f128, Expand);
- }
+ addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
+ setOperationAction(ISD::FADD, MVT::f128, Legal);
+ setOperationAction(ISD::FSUB, MVT::f128, Legal);
+ setOperationAction(ISD::FDIV, MVT::f128, Legal);
+ setOperationAction(ISD::FMUL, MVT::f128, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
+ // No extending loads to f128 on PPC.
+ for (MVT FPT : MVT::fp_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
+ setOperationAction(ISD::FMA, MVT::f128, Legal);
+ setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
+
+ setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
+ setOperationAction(ISD::FRINT, MVT::f128, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f128, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
+ setOperationAction(ISD::FROUND, MVT::f128, Legal);
+
+ setOperationAction(ISD::SELECT, MVT::f128, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i128, Custom);
+ // No implementation for these ops for PowerPC.
+ setOperationAction(ISD::FSIN, MVT::f128, Expand);
+ setOperationAction(ISD::FCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FPOW, MVT::f128, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f128, Expand);
+ setOperationAction(ISD::FREM, MVT::f128, Expand);
+
+ // Handle constrained floating-point operations of fp128
+ setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
+ setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
@@ -1117,6 +1230,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
}
+
+ // TODO: Handle constrained floating-point operations of v4f64
}
if (Subtarget.has64BitSupport())
@@ -1151,6 +1266,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::MUL);
+ setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::BUILD_VECTOR);
if (Subtarget.hasFPCVT())
@@ -1190,34 +1306,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::VSELECT);
}
- // Darwin long double math library functions have $LDBL128 appended.
- if (Subtarget.isDarwin()) {
- setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
- setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
- setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
- setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
- setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
- setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
- setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
- setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
- setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
- setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
- }
-
- if (EnableQuadPrecision) {
- setLibcallName(RTLIB::LOG_F128, "logf128");
- setLibcallName(RTLIB::LOG2_F128, "log2f128");
- setLibcallName(RTLIB::LOG10_F128, "log10f128");
- setLibcallName(RTLIB::EXP_F128, "expf128");
- setLibcallName(RTLIB::EXP2_F128, "exp2f128");
- setLibcallName(RTLIB::SIN_F128, "sinf128");
- setLibcallName(RTLIB::COS_F128, "cosf128");
- setLibcallName(RTLIB::POW_F128, "powf128");
- setLibcallName(RTLIB::FMIN_F128, "fminf128");
- setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
- setLibcallName(RTLIB::POWI_F128, "__powikf2");
- setLibcallName(RTLIB::REM_F128, "fmodf128");
- }
+ setLibcallName(RTLIB::LOG_F128, "logf128");
+ setLibcallName(RTLIB::LOG2_F128, "log2f128");
+ setLibcallName(RTLIB::LOG10_F128, "log10f128");
+ setLibcallName(RTLIB::EXP_F128, "expf128");
+ setLibcallName(RTLIB::EXP2_F128, "exp2f128");
+ setLibcallName(RTLIB::SIN_F128, "sinf128");
+ setLibcallName(RTLIB::COS_F128, "cosf128");
+ setLibcallName(RTLIB::POW_F128, "powf128");
+ setLibcallName(RTLIB::FMIN_F128, "fminf128");
+ setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
+ setLibcallName(RTLIB::POWI_F128, "__powikf2");
+ setLibcallName(RTLIB::REM_F128, "fmodf128");
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
@@ -1227,8 +1327,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
setMinFunctionAlignment(Align(4));
- if (Subtarget.isDarwin())
- setPrefFunctionAlignment(Align(16));
switch (Subtarget.getCPUDirective()) {
default: break;
@@ -1245,6 +1343,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
case PPC::DIR_PWR9:
+ case PPC::DIR_PWR10:
case PPC::DIR_PWR_FUTURE:
setPrefLoopAlignment(Align(16));
setPrefFunctionAlignment(Align(16));
@@ -1280,27 +1379,33 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
MaxLoadsPerMemcmp = 8;
MaxLoadsPerMemcmpOptSize = 4;
}
+
+ // Let the subtarget (CPU) decide if a predictable select is more expensive
+ // than the corresponding branch. This information is used in CGP to decide
+ // when to convert selects into branches.
+ PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
}
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
-static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
- unsigned MaxMaxAlign) {
+static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
if (MaxAlign == MaxMaxAlign)
return;
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
- if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
- MaxAlign = 32;
- else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
- MaxAlign = 16;
+ if (MaxMaxAlign >= 32 &&
+ VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
+ MaxAlign = Align(32);
+ else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
+ MaxAlign < 16)
+ MaxAlign = Align(16);
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- unsigned EltAlign = 0;
+ Align EltAlign;
getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (auto *EltTy : STy->elements()) {
- unsigned EltAlign = 0;
+ Align EltAlign;
getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
@@ -1314,16 +1419,12 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
/// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
- // Darwin passes everything on 4 byte boundary.
- if (Subtarget.isDarwin())
- return 4;
-
// 16byte and wider vectors are passed on 16byte boundary.
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
- unsigned Align = Subtarget.isPPC64() ? 8 : 4;
+ Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
if (Subtarget.hasAltivec() || Subtarget.hasQPX())
- getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
- return Align;
+ getMaxByValAlign(Ty, Alignment, Subtarget.hasQPX() ? Align(32) : Align(16));
+ return Alignment.value();
}
bool PPCTargetLowering::useSoftFloat() const {
@@ -1338,6 +1439,16 @@ bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
return VT.isScalarInteger();
}
+/// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a specific
+/// type is cheaper than a multiply followed by a shift.
+/// This is true for words and doublewords on 64-bit PowerPC.
+bool PPCTargetLowering::isMulhCheaperThanMulShift(EVT Type) const {
+ if (Subtarget.isPPC64() && (isOperationLegal(ISD::MULHS, Type) ||
+ isOperationLegal(ISD::MULHU, Type)))
+ return true;
+ return TargetLowering::isMulhCheaperThanMulShift(Type);
+}
+
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
@@ -1359,10 +1470,12 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::FRE: return "PPCISD::FRE";
case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
case PPCISD::STFIWX: return "PPCISD::STFIWX";
- case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
- case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
+ case PPCISD::XXSPLTI_SP_TO_DP:
+ return "PPCISD::XXSPLTI_SP_TO_DP";
+ case PPCISD::XXSPLTI32DX:
+ return "PPCISD::XXSPLTI32DX";
case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::VECSHL: return "PPCISD::VECSHL";
@@ -1374,6 +1487,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
+ case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
case PPCISD::SRL: return "PPCISD::SRL";
case PPCISD::SRA: return "PPCISD::SRA";
@@ -1381,6 +1495,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
+ case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
@@ -1394,6 +1509,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
+ case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
+ return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
case PPCISD::ANDI_rec_1_EQ_BIT:
return "PPCISD::ANDI_rec_1_EQ_BIT";
case PPCISD::ANDI_rec_1_GT_BIT:
@@ -1407,7 +1524,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
case PPCISD::STXSIX: return "PPCISD::STXSIX";
case PPCISD::VEXTS: return "PPCISD::VEXTS";
- case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
@@ -1457,7 +1573,9 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
+ case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
+ case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
}
return nullptr;
}
@@ -2320,17 +2438,22 @@ bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
/// non-zero and N can be represented by a base register plus a signed 16-bit
/// displacement, make a more precise judgement by checking (displacement % \p
/// EncodingAlignment).
-bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
- SDValue &Index, SelectionDAG &DAG,
- unsigned EncodingAlignment) const {
- int16_t imm = 0;
+bool PPCTargetLowering::SelectAddressRegReg(
+ SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
+ MaybeAlign EncodingAlignment) const {
+ // If we have a PC Relative target flag don't select as [reg+reg]. It will be
+ // a [pc+imm].
+ if (SelectAddressPCRel(N, Base))
+ return false;
+
+ int16_t Imm = 0;
if (N.getOpcode() == ISD::ADD) {
// Is there any SPE load/store (f64), which can't handle 16bit offset?
// SPE load/store can only handle 8-bit offsets.
if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
return true;
- if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!EncodingAlignment || !(imm % EncodingAlignment)))
+ if (isIntS16Immediate(N.getOperand(1), Imm) &&
+ (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
return false; // r+i
if (N.getOperand(1).getOpcode() == PPCISD::Lo)
return false; // r+i
@@ -2339,8 +2462,8 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
Index = N.getOperand(1);
return true;
} else if (N.getOpcode() == ISD::OR) {
- if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!EncodingAlignment || !(imm % EncodingAlignment)))
+ if (isIntS16Immediate(N.getOperand(1), Imm) &&
+ (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
return false; // r+i can fold it if we can.
// If this is an or of disjoint bitfields, we can codegen this as an add
@@ -2395,8 +2518,7 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FrameIdx);
- if (Align >= 4)
+ if (MFI.getObjectAlign(FrameIdx) >= Align(4))
return;
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
@@ -2407,12 +2529,17 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
/// a signed 16-bit displacement [r+imm], and if it is not better
/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
/// displacements that are multiples of that value.
-bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
- SDValue &Base,
- SelectionDAG &DAG,
- unsigned EncodingAlignment) const {
+bool PPCTargetLowering::SelectAddressRegImm(
+ SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
+ MaybeAlign EncodingAlignment) const {
// FIXME dl should come from parent load or store, not from address
SDLoc dl(N);
+
+ // If we have a PC Relative target flag don't select as [reg+imm]. It will be
+ // a [pc+imm].
+ if (SelectAddressPCRel(N, Base))
+ return false;
+
// If this can be more profitably realized as r+r, fail.
if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
return false;
@@ -2420,7 +2547,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
if (N.getOpcode() == ISD::ADD) {
int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
+ (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
@@ -2444,7 +2571,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
} else if (N.getOpcode() == ISD::OR) {
int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
+ (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
@@ -2471,7 +2598,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// this as "d, 0"
int16_t Imm;
if (isIntS16Immediate(CN, Imm) &&
- (!EncodingAlignment || (Imm % EncodingAlignment) == 0)) {
+ (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
@@ -2481,7 +2608,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// Handle 32-bit sext immediates with LIS + addr mode.
if ((CN->getValueType(0) == MVT::i32 ||
(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
- (!EncodingAlignment || (CN->getZExtValue() % EncodingAlignment) == 0)) {
+ (!EncodingAlignment ||
+ isAligned(*EncodingAlignment, CN->getZExtValue()))) {
int Addr = (int)CN->getZExtValue();
// Otherwise, break this down into an LIS + disp.
@@ -2536,6 +2664,27 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
return true;
}
+template <typename Ty> static bool isValidPCRelNode(SDValue N) {
+ Ty *PCRelCand = dyn_cast<Ty>(N);
+ return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
+}
+
+/// Returns true if this address is a PC Relative address.
+/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
+/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
+bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
+ // This is a materialize PC Relative node. Always select this as PC Relative.
+ Base = N;
+ if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
+ return true;
+ if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
+ isValidPCRelNode<GlobalAddressSDNode>(N) ||
+ isValidPCRelNode<JumpTableSDNode>(N) ||
+ isValidPCRelNode<BlockAddressSDNode>(N))
+ return true;
+ return false;
+}
+
/// Returns true if we should use a direct load into vector instruction
/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
@@ -2573,7 +2722,8 @@ static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
UI != UE; ++UI)
if (UI.getUse().get().getResNo() == 0 &&
- UI->getOpcode() != ISD::SCALAR_TO_VECTOR)
+ UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
+ UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
return false;
return true;
@@ -2646,14 +2796,14 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
// LDU/STU can only handle immediates that are a multiple of 4.
if (VT != MVT::i64) {
- if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
return false;
} else {
// LDU/STU need an address with at least 4-byte alignment.
if (Alignment < 4)
return false;
- if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
return false;
}
@@ -2687,18 +2837,6 @@ static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
HiOpFlags |= PPCII::MO_PIC_FLAG;
LoOpFlags |= PPCII::MO_PIC_FLAG;
}
-
- // If this is a reference to a global value that requires a non-lazy-ptr, make
- // sure that instruction lowering adds it.
- if (GV && Subtarget.hasLazyResolverStub(GV)) {
- HiOpFlags |= PPCII::MO_NLP_FLAG;
- LoOpFlags |= PPCII::MO_NLP_FLAG;
-
- if (GV->hasHiddenVisibility()) {
- HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
- LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
- }
- }
}
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
@@ -2740,7 +2878,7 @@ SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
SDValue Ops[] = { GA, Reg };
return DAG.getMemIntrinsicNode(
PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
MachineMemOperand::MOLoad);
}
@@ -2753,8 +2891,15 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ SDLoc DL(CP);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ SDValue ConstPool = DAG.getTargetConstantPool(
+ C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
+ return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
+ }
setUsesTOCBasePtr(DAG);
- SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
return getTOCEntry(DAG, SDLoc(CP), GA);
}
@@ -2763,15 +2908,15 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
if (IsPIC && Subtarget.isSVR4ABI()) {
- SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
- PPCII::MO_PIC_FLAG);
+ SDValue GA =
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
return getTOCEntry(DAG, SDLoc(CP), GA);
}
SDValue CPIHi =
- DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
SDValue CPILo =
- DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
}
@@ -2828,6 +2973,16 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ // isUsingPCRelativeCalls() returns true when PCRelative is enabled
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ SDLoc DL(JT);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ SDValue GA =
+ DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
+ SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
+ return MatAddr;
+ }
+
// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
@@ -2857,6 +3012,16 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
const BlockAddress *BA = BASDN->getBlockAddress();
+ // isUsingPCRelativeCalls() returns true when PCRelative is enabled
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ SDLoc DL(BASDN);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
+ PPCII::MO_PCREL_FLAG);
+ SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
+ return MatAddr;
+ }
+
// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual BlockAddress is stored in the TOC.
if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
@@ -2986,6 +3151,22 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
// 64-bit SVR4 ABI & AIX ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ if (isAccessedAsGotIndirect(Op)) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
+ PPCII::MO_PCREL_FLAG |
+ PPCII::MO_GOT_FLAG);
+ SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
+ SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
+ MachinePointerInfo());
+ return Load;
+ } else {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
+ PPCII::MO_PCREL_FLAG);
+ return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
+ }
+ }
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
return getTOCEntry(DAG, DL, GA);
@@ -3007,13 +3188,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SDValue GALo =
DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
- SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
-
- // If the global reference is actually to a non-lazy-pointer, we have to do an
- // extra load to get the address of the global.
- if (MOHiFlag & PPCII::MO_NLP_FLAG)
- Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
- return Ptr;
+ return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
}
SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -3174,10 +3349,10 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
// We have to copy the entire va_list struct:
// 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
- return DAG.getMemcpy(Op.getOperand(0), Op,
- Op.getOperand(1), Op.getOperand(2),
- DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
- false, MachinePointerInfo(), MachinePointerInfo());
+ return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
+ false, true, false, MachinePointerInfo(),
+ MachinePointerInfo());
}
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
@@ -3234,7 +3409,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
- if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
+ if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
@@ -3340,31 +3515,31 @@ static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
/// CalculateStackSlotAlignment - Calculates the alignment of this argument
/// on the stack.
-static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
- ISD::ArgFlagsTy Flags,
- unsigned PtrByteSize) {
- unsigned Align = PtrByteSize;
+static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
+ ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize) {
+ Align Alignment(PtrByteSize);
// Altivec parameters are padded to a 16 byte boundary.
if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
- Align = 16;
+ Alignment = Align(16);
// QPX vector types stored in double-precision are padded to a 32 byte
// boundary.
else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
- Align = 32;
+ Alignment = Align(32);
// ByVal parameters are aligned as requested.
if (Flags.isByVal()) {
- unsigned BVAlign = Flags.getByValAlign();
+ auto BVAlign = Flags.getNonZeroByValAlign();
if (BVAlign > PtrByteSize) {
- if (BVAlign % PtrByteSize != 0)
- llvm_unreachable(
+ if (BVAlign.value() % PtrByteSize != 0)
+ llvm_unreachable(
"ByVal alignment is not a multiple of the pointer size");
- Align = BVAlign;
+ Alignment = BVAlign;
}
}
@@ -3374,12 +3549,12 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
// needs to be aligned to the size of the full type. (Except for
// ppcf128, which is only aligned as its f64 components.)
if (Flags.isSplit() && OrigVT != MVT::ppcf128)
- Align = OrigVT.getStoreSize();
+ Alignment = Align(OrigVT.getStoreSize());
else
- Align = ArgVT.getStoreSize();
+ Alignment = Align(ArgVT.getStoreSize());
}
- return Align;
+ return Alignment;
}
/// CalculateStackSlotUsed - Return whether this argument will use its
@@ -3397,9 +3572,9 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
bool UseMemory = false;
// Respect alignment of argument on the stack.
- unsigned Align =
- CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
- ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ Align Alignment =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = alignTo(ArgOffset, Alignment);
// If there's no space left in the argument save area, we must
// use memory (this check also catches zero-sized arguments).
if (ArgOffset >= LinkageSize + ParamAreaSize)
@@ -3443,10 +3618,7 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
/// ensure minimum alignment required for target.
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
unsigned NumBytes) {
- unsigned TargetAlign = Lowering->getStackAlignment();
- unsigned AlignMask = TargetAlign - 1;
- NumBytes = (NumBytes + AlignMask) & ~AlignMask;
- return NumBytes;
+ return alignTo(NumBytes, Lowering->getStackAlign());
}
SDValue PPCTargetLowering::LowerFormalArguments(
@@ -3509,7 +3681,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// Potential tail calls could cause overwriting of argument stack slots.
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
(CallConv == CallingConv::Fast));
- unsigned PtrByteSize = 4;
+ const Align PtrAlign(4);
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -3518,7 +3690,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// Reserve space for the linkage area on the stack.
unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
- CCInfo.AllocateStack(LinkageSize, PtrByteSize);
+ CCInfo.AllocateStack(LinkageSize, PtrAlign);
if (useSoftFloat())
CCInfo.PreAnalyzeFormalArguments(Ins);
@@ -3627,7 +3799,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
- CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+ CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
@@ -3674,7 +3846,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
CCInfo.getNextStackOffset(), true));
- FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
+ FuncInfo->setVarArgsFrameIndex(
+ MFI.CreateStackObject(Depth, Align(8), false));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
// The fixed integer arguments of a variadic function are stored to the
@@ -3821,11 +3994,13 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
// We re-align the argument offset for each argument, except when using the
// fast calling convention, when we need to make sure we do that only when
// we'll actually use a stack slot.
- unsigned CurArgOffset, Align;
+ unsigned CurArgOffset;
+ Align Alignment;
auto ComputeArgOffset = [&]() {
/* Respect alignment of argument on the stack. */
- Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
- ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ Alignment =
+ CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = alignTo(ArgOffset, Alignment);
CurArgOffset = ArgOffset;
};
@@ -3873,7 +4048,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
else
- FI = MFI.CreateStackObject(ArgSize, Align, false);
+ FI = MFI.CreateStackObject(ArgSize, Alignment, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
// Handle aggregates smaller than 8 bytes.
@@ -4121,7 +4296,11 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
- if (isVarArg) {
+ // On ELFv2ABI spec, it writes:
+ // C programs that are intended to be *portable* across different compilers
+ // and architectures must use the header file <stdarg.h> to deal with variable
+ // argument lists.
+ if (isVarArg && MFI.hasVAStart()) {
int Depth = ArgOffset;
FuncInfo->setVarArgsFrameIndex(
@@ -4529,30 +4708,67 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
static bool isFunctionGlobalAddress(SDValue Callee);
-static bool
-callsShareTOCBase(const Function *Caller, SDValue Callee,
- const TargetMachine &TM) {
- // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
- // don't have enough information to determine if the caller and calle share
- // the same TOC base, so we have to pessimistically assume they don't for
- // correctness.
- GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if (!G)
- return false;
-
- const GlobalValue *GV = G->getGlobal();
+static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
+ const TargetMachine &TM) {
+ // It does not make sense to call callsShareTOCBase() with a caller that
+ // is PC Relative since PC Relative callers do not have a TOC.
+#ifndef NDEBUG
+ const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
+ assert(!STICaller->isUsingPCRelativeCalls() &&
+ "PC Relative callers do not have a TOC and cannot share a TOC Base");
+#endif
+
+ // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
+ // don't have enough information to determine if the caller and callee share
+ // the same TOC base, so we have to pessimistically assume they don't for
+ // correctness.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (!G)
+ return false;
+
+ const GlobalValue *GV = G->getGlobal();
+
+ // If the callee is preemptable, then the static linker will use a plt-stub
+ // which saves the toc to the stack, and needs a nop after the call
+ // instruction to convert to a toc-restore.
+ if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
+ return false;
+
+ // Functions with PC Relative enabled may clobber the TOC in the same DSO.
+ // We may need a TOC restore in the situation where the caller requires a
+ // valid TOC but the callee is PC Relative and does not.
+ const Function *F = dyn_cast<Function>(GV);
+ const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
+
+ // If we have an Alias we can try to get the function from there.
+ if (Alias) {
+ const GlobalObject *GlobalObj = Alias->getBaseObject();
+ F = dyn_cast<Function>(GlobalObj);
+ }
+
+ // If we still have no valid function pointer we do not have enough
+ // information to determine if the callee uses PC Relative calls so we must
+ // assume that it does.
+ if (!F)
+ return false;
+
+ // If the callee uses PC Relative we cannot guarantee that the callee won't
+ // clobber the TOC of the caller and so we must assume that the two
+ // functions do not share a TOC base.
+ const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
+ if (STICallee->isUsingPCRelativeCalls())
+ return false;
+
// The medium and large code models are expected to provide a sufficiently
// large TOC to provide all data addressing needs of a module with a
- // single TOC. Since each module will be addressed with a single TOC then we
- // only need to check that caller and callee don't cross dso boundaries.
+ // single TOC.
if (CodeModel::Medium == TM.getCodeModel() ||
CodeModel::Large == TM.getCodeModel())
- return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
+ return true;
// Otherwise we need to ensure callee and caller are in the same section,
// since the linker may allocate multiple TOCs, and we don't know which
// sections will belong to the same TOC base.
-
if (!GV->isStrongDefinitionForLinker())
return false;
@@ -4567,26 +4783,6 @@ callsShareTOCBase(const Function *Caller, SDValue Callee,
return false;
}
- // If the callee might be interposed, then we can't assume the ultimate call
- // target will be in the same section. Even in cases where we can assume that
- // interposition won't happen, in any case where the linker might insert a
- // stub to allow for interposition, we must generate code as though
- // interposition might occur. To understand why this matters, consider a
- // situation where: a -> b -> c where the arrows indicate calls. b and c are
- // in the same section, but a is in a different module (i.e. has a different
- // TOC base pointer). If the linker allows for interposition between b and c,
- // then it will generate a stub for the call edge between b and c which will
- // save the TOC pointer into the designated stack slot allocated by b. If we
- // return true here, and therefore allow a tail call between b and c, that
- // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
- // pointer into the stack slot allocated by a (where the a -> b stub saved
- // a's TOC base pointer). If we're not considering a tail call, but rather,
- // whether a nop is needed after the call instruction in b, because the linker
- // will insert a stub, it might complain about a missing nop if we omit it
- // (although many don't complain in this case).
- if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
- return false;
-
return true;
}
@@ -4628,13 +4824,12 @@ needStackSlotPassParameters(const PPCSubtarget &Subtarget,
return false;
}
-static bool
-hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
- if (CS.arg_size() != CallerFn->arg_size())
+static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
+ if (CB.arg_size() != CallerFn->arg_size())
return false;
- ImmutableCallSite::arg_iterator CalleeArgIter = CS.arg_begin();
- ImmutableCallSite::arg_iterator CalleeArgEnd = CS.arg_end();
+ auto CalleeArgIter = CB.arg_begin();
+ auto CalleeArgEnd = CB.arg_end();
Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
@@ -4676,15 +4871,10 @@ areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
return CallerCC == CallingConv::C || CallerCC == CalleeCC;
}
-bool
-PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
- SDValue Callee,
- CallingConv::ID CalleeCC,
- ImmutableCallSite CS,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const {
+bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
+ SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
if (DisableSCO && !TailCallOpt) return false;
@@ -4726,15 +4916,22 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
needStackSlotPassParameters(Subtarget, Outs))
return false;
- // No TCO/SCO on indirect call because Caller have to restore its TOC
- if (!isFunctionGlobalAddress(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee))
+ // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
+ // the caller and callee share the same TOC for TCO/SCO. If the caller and
+ // callee potentially have different TOC bases then we cannot tail call since
+ // we need to restore the TOC pointer after the call.
+ // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
+ // We cannot guarantee this for indirect calls or calls to external functions.
+ // When PC-Relative addressing is used, the concept of the TOC is no longer
+ // applicable so this check is not required.
+ // Check first for indirect calls.
+ if (!Subtarget.isUsingPCRelativeCalls() &&
+ !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
return false;
- // If the caller and callee potentially have different TOC bases then we
- // cannot tail call since we need to restore the TOC pointer after the call.
- // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
- if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
+ // Check if we share the TOC base.
+ if (!Subtarget.isUsingPCRelativeCalls() &&
+ !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
return false;
// TCO allows altering callee ABI, so we don't have to check further.
@@ -4746,10 +4943,14 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
// If callee use the same argument list that caller is using, then we can
// apply SCO on this case. If it is not, then we need to check if callee needs
// stack for passing arguments.
- if (!hasSameArgumentList(&Caller, CS) &&
- needStackSlotPassParameters(Subtarget, Outs)) {
+ // PC Relative tail calls may not have a CallBase.
+ // If there is no CallBase we cannot verify if we have the same argument
+ // list so assume that we don't have the same argument list.
+ if (CB && !hasSameArgumentList(&Caller, *CB) &&
+ needStackSlotPassParameters(Subtarget, Outs))
+ return false;
+ else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
return false;
- }
return true;
}
@@ -4858,18 +5059,6 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
MachinePointerInfo::getFixedStack(MF, NewRetAddr));
-
- // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
- // slot as the FP is never overwritten.
- if (Subtarget.isDarwinABI()) {
- int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
- int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
- true);
- SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
- Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
- MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), NewFPIdx));
- }
}
return Chain;
}
@@ -4904,14 +5093,6 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
LROpOut = getReturnAddrFrameIndex(DAG);
LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
Chain = SDValue(LROpOut.getNode(), 1);
-
- // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
- // slot as the FP is never overwritten.
- if (Subtarget.isDarwinABI()) {
- FPOpOut = getFramePointerFrameIndex(DAG);
- FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo());
- Chain = SDValue(FPOpOut.getNode(), 1);
- }
}
return Chain;
}
@@ -4926,9 +5107,9 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
SDValue Chain, ISD::ArgFlagsTy Flags,
SelectionDAG &DAG, const SDLoc &dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
- return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
- false, false, false, MachinePointerInfo(),
- MachinePointerInfo());
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
+ Flags.getNonZeroByValAlign(), false, false, false,
+ MachinePointerInfo(), MachinePointerInfo());
}
/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
@@ -5079,28 +5260,37 @@ static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
return true;
}
-static unsigned getCallOpcode(bool isIndirectCall, bool isPatchPoint,
- bool isTailCall, const Function &Caller,
+// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
+static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
+ return Subtarget.isAIXABI() ||
+ (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
+}
+
+static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
+ const Function &Caller,
const SDValue &Callee,
const PPCSubtarget &Subtarget,
const TargetMachine &TM) {
- if (isTailCall)
+ if (CFlags.IsTailCall)
return PPCISD::TC_RETURN;
// This is a call through a function pointer.
- if (isIndirectCall) {
+ if (CFlags.IsIndirect) {
// AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
// indirect calls. The save of the caller's TOC pointer to the stack will be
// inserted into the DAG as part of call lowering. The restore of the TOC
// pointer is modeled by using a pseudo instruction for the call opcode that
// represents the 2 instruction sequence of an indirect branch and link,
// immediately followed by a load of the TOC pointer from the the stack save
- // slot into gpr2.
- if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
- return PPCISD::BCTRL_LOAD_TOC;
+ // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
+ // as it is not saved or used.
+ return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
+ : PPCISD::BCTRL;
+ }
- // An indirect call that does not need a TOC restore.
- return PPCISD::BCTRL;
+ if (Subtarget.isUsingPCRelativeCalls()) {
+ assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
+ return PPCISD::CALL_NOTOC;
}
// The ABIs that maintain a TOC pointer accross calls need to have a nop
@@ -5118,14 +5308,6 @@ static unsigned getCallOpcode(bool isIndirectCall, bool isPatchPoint,
return PPCISD::CALL;
}
-static bool isValidAIXExternalSymSDNode(StringRef SymName) {
- return StringSwitch<bool>(SymName)
- .Cases("__divdi3", "__fixunsdfdi", "__floatundidf", "__floatundisf",
- "__moddi3", "__udivdi3", "__umoddi3", true)
- .Cases("ceil", "floor", "memcpy", "memmove", "memset", "round", true)
- .Default(false);
-}
-
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
const SDLoc &dl, const PPCSubtarget &Subtarget) {
if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
@@ -5161,14 +5343,14 @@ static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
MCSymbolXCOFF *S = cast<MCSymbolXCOFF>(
Context.getOrCreateSymbol(Twine(".") + Twine(FuncName)));
- if (IsDeclaration && !S->hasContainingCsect()) {
+ if (IsDeclaration && !S->hasRepresentedCsectSet()) {
// On AIX, an undefined symbol needs to be associated with a
// MCSectionXCOFF to get the correct storage mapping class.
// In this case, XCOFF::XMC_PR.
MCSectionXCOFF *Sec = Context.getXCOFFSection(
- S->getName(), XCOFF::XMC_PR, XCOFF::XTY_ER, SC,
+ S->getSymbolTableName(), XCOFF::XMC_PR, XCOFF::XTY_ER, SC,
SectionKind::getMetadata());
- S->setContainingCsect(Sec);
+ S->setRepresentedCsect(Sec);
}
MVT PtrVT =
@@ -5209,12 +5391,7 @@ static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
SC);
}
- // TODO: Remove this when the support for ExternalSymbolSDNode is complete.
- if (isValidAIXExternalSymSDNode(SymName)) {
- return getAIXFuncEntryPointSymbolSDNode(SymName, true, XCOFF::C_EXT);
- }
-
- report_fatal_error("Unexpected ExternalSymbolSDNode: " + Twine(SymName));
+ return getAIXFuncEntryPointSymbolSDNode(SymName, true, XCOFF::C_EXT);
}
// No transformation needed.
@@ -5252,7 +5429,7 @@ static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
SDValue &Glue, SDValue &Chain,
SDValue CallSeqStart,
- ImmutableCallSite CS, const SDLoc &dl,
+ const CallBase *CB, const SDLoc &dl,
bool hasNest,
const PPCSubtarget &Subtarget) {
// Function pointers in the 64-bit SVR4 ABI do not point to the function
@@ -5288,7 +5465,7 @@ static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
MachineMemOperand::MOInvariant)
: MachineMemOperand::MONone;
- MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
+ MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
// Registers used in building the DAG.
const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
@@ -5342,12 +5519,12 @@ static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
}
static void
-buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
- const SDLoc &dl, bool isTailCall, bool isVarArg,
- bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
+buildCallOperands(SmallVectorImpl<SDValue> &Ops,
+ PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
+ SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
- const PPCSubtarget &Subtarget, bool isIndirect) {
+ const PPCSubtarget &Subtarget) {
const bool IsPPC64 = Subtarget.isPPC64();
// MVT for a general purpose register.
const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
@@ -5356,10 +5533,10 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
Ops.push_back(Chain);
// If it's a direct call pass the callee as the second operand.
- if (!isIndirect)
+ if (!CFlags.IsIndirect)
Ops.push_back(Callee);
else {
- assert(!isPatchPoint && "Patch point call are not indirect.");
+ assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
// For the TOC based ABIs, we have saved the TOC pointer to the linkage area
// on the stack (this would have been done in `LowerCall_64SVR4` or
@@ -5368,7 +5545,9 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
// pointer from the linkage area. The operand for the TOC restore is an add
// of the TOC save offset to the stack pointer. This must be the second
// operand: after the chain input but before any other variadic arguments.
- if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
+ // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
+ // saved or used.
+ if (isTOCSaveRestoreRequired(Subtarget)) {
const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
@@ -5379,18 +5558,18 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
}
// Add the register used for the environment pointer.
- if (Subtarget.usesFunctionDescriptors() && !hasNest)
+ if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
RegVT));
// Add CTR register as callee so a bctr can be emitted later.
- if (isTailCall)
+ if (CFlags.IsTailCall)
Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
}
// If this is a tail call add stack pointer delta.
- if (isTailCall)
+ if (CFlags.IsTailCall)
Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
// Add argument registers to the end of the list so that they are known live
@@ -5402,17 +5581,18 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
// We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
// no way to mark dependencies as implicit here.
// We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
- if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && !isPatchPoint)
+ if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
+ !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
- if (isVarArg && Subtarget.is32BitELFABI())
+ if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *Mask =
- TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
+ TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -5422,44 +5602,47 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
}
SDValue PPCTargetLowering::FinishCall(
- CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
- bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
+ CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
- SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
+ SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
- if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI())
+ if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
+ Subtarget.isAIXABI())
setUsesTOCBasePtr(DAG);
- const bool isIndirect = isIndirectCall(Callee, DAG, Subtarget, isPatchPoint);
- unsigned CallOpc = getCallOpcode(isIndirect, isPatchPoint, isTailCall,
- DAG.getMachineFunction().getFunction(),
- Callee, Subtarget, DAG.getTarget());
+ unsigned CallOpc =
+ getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
+ Subtarget, DAG.getTarget());
- if (!isIndirect)
+ if (!CFlags.IsIndirect)
Callee = transformCallee(Callee, DAG, dl, Subtarget);
else if (Subtarget.usesFunctionDescriptors())
- prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CS,
- dl, hasNest, Subtarget);
+ prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
+ dl, CFlags.HasNest, Subtarget);
else
prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
// Build the operand list for the call instruction.
SmallVector<SDValue, 8> Ops;
- buildCallOperands(Ops, CallConv, dl, isTailCall, isVarArg, isPatchPoint,
- hasNest, DAG, RegsToPass, Glue, Chain, Callee, SPDiff,
- Subtarget, isIndirect);
+ buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
+ SPDiff, Subtarget);
// Emit tail call.
- if (isTailCall) {
+ if (CFlags.IsTailCall) {
+ // Indirect tail call when using PC Relative calls do not have the same
+ // constraints.
assert(((Callee.getOpcode() == ISD::Register &&
cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
Callee.getOpcode() == ISD::TargetExternalSymbol ||
Callee.getOpcode() == ISD::TargetGlobalAddress ||
- isa<ConstantSDNode>(Callee)) &&
- "Expecting a global address, external symbol, absolute value or "
- "register");
+ isa<ConstantSDNode>(Callee) ||
+ (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
+ "Expecting a global address, external symbol, absolute value, "
+ "register or an indirect tail call when PC Relative calls are "
+ "used.");
+ // PC Relative calls also use TC_RETURN as the way to mark tail calls.
assert(CallOpc == PPCISD::TC_RETURN &&
"Unexpected call opcode for a tail call.");
DAG.getMachineFunction().getFrameInfo().setHasTailCall();
@@ -5468,12 +5651,13 @@ SDValue PPCTargetLowering::FinishCall(
std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
+ DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
Glue = Chain.getValue(1);
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
// PPCFrameLowering::eliminateCallFramePseudoInstr.
- int BytesCalleePops = (CallConv == CallingConv::Fast &&
+ int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
getTargetMachine().Options.GuaranteedTailCallOpt)
? NumBytes
: 0;
@@ -5483,7 +5667,8 @@ SDValue PPCTargetLowering::FinishCall(
Glue, dl);
Glue = Chain.getValue(1);
- return LowerCallResult(Chain, Glue, CallConv, isVarArg, Ins, dl, DAG, InVals);
+ return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
+ DAG, InVals);
}
SDValue
@@ -5500,15 +5685,14 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
bool isPatchPoint = CLI.IsPatchPoint;
- ImmutableCallSite CS = CLI.CS;
+ const CallBase *CB = CLI.CB;
if (isTailCall) {
- if (Subtarget.useLongCalls() && !(CS && CS.isMustTailCall()))
+ if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
isTailCall = false;
else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
- isTailCall =
- IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
- isVarArg, Outs, Ins, DAG);
+ isTailCall = IsEligibleForTailCallOptimization_64SVR4(
+ Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
else
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Ins, DAG);
@@ -5517,21 +5701,23 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (!getTargetMachine().Options.GuaranteedTailCallOpt)
++NumSiblingCalls;
- assert(isa<GlobalAddressSDNode>(Callee) &&
+ // PC Relative calls no longer guarantee that the callee is a Global
+ // Address Node. The callee could be an indirect tail call in which
+ // case the SDValue for the callee could be a load (to load the address
+ // of a function pointer) or it may be a register copy (to move the
+ // address of the callee from a function parameter into a virtual
+ // register). It may also be an ExternalSymbolSDNode (ex memcopy).
+ assert((Subtarget.isUsingPCRelativeCalls() ||
+ isa<GlobalAddressSDNode>(Callee)) &&
"Callee should be an llvm::Function object.");
- LLVM_DEBUG(
- const GlobalValue *GV =
- cast<GlobalAddressSDNode>(Callee)->getGlobal();
- const unsigned Width =
- 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
- dbgs() << "TCO caller: "
- << left_justify(DAG.getMachineFunction().getName(), Width)
- << ", callee linkage: " << GV->getVisibility() << ", "
- << GV->getLinkage() << "\n");
+
+ LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
+ << "\nTCO callee: ");
+ LLVM_DEBUG(Callee.dump());
}
}
- if (!isTailCall && CS && CS.isMustTailCall())
+ if (!isTailCall && CB && CB->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
@@ -5542,42 +5728,49 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
!isTailCall)
Callee = LowerGlobalAddress(Callee, DAG);
+ CallFlags CFlags(
+ CallConv, isTailCall, isVarArg, isPatchPoint,
+ isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
+ // hasNest
+ Subtarget.is64BitELFABI() &&
+ any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
+ CLI.NoMerge);
+
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
- return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, isPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals, CS);
+ return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
+ InVals, CB);
if (Subtarget.isSVR4ABI())
- return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, isPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals, CS);
+ return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
+ InVals, CB);
if (Subtarget.isAIXABI())
- return LowerCall_AIX(Chain, Callee, CallConv, isVarArg,
- isTailCall, isPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals, CS);
+ return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
+ InVals, CB);
- return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
- isTailCall, isPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals, CS);
+ return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
+ InVals, CB);
}
SDValue PPCTargetLowering::LowerCall_32SVR4(
- SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const {
+ const CallBase *CB) const {
// See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
// of the 32-bit SVR4 ABI stack frame layout.
+ const CallingConv::ID CallConv = CFlags.CallConv;
+ const bool IsVarArg = CFlags.IsVarArg;
+ const bool IsTailCall = CFlags.IsTailCall;
+
assert((CallConv == CallingConv::C ||
CallConv == CallingConv::Cold ||
CallConv == CallingConv::Fast) && "Unknown calling convention!");
- unsigned PtrByteSize = 4;
+ const Align PtrAlign(4);
MachineFunction &MF = DAG.getMachineFunction();
@@ -5596,15 +5789,15 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
// Assign locations to all of the outgoing arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+ PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
- PtrByteSize);
+ PtrAlign);
if (useSoftFloat())
CCInfo.PreAnalyzeCallOperands(Outs);
- if (isVarArg) {
+ if (IsVarArg) {
// Handle fixed and variable vector arguments differently.
// Fixed vector arguments go into registers as long as registers are
// available. Variable vector arguments always go into memory.
@@ -5639,10 +5832,10 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
// Assign locations to all of the outgoing aggregate by value arguments.
SmallVector<CCValAssign, 16> ByValArgLocs;
- CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
+ CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
- CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+ CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
@@ -5653,7 +5846,7 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
- int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+ int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
@@ -5749,7 +5942,7 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
assert(VA.isMemLoc());
unsigned LocMemOffset = VA.getLocMemOffset();
- if (!isTailCall) {
+ if (!IsTailCall) {
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
StackPtr, PtrOff);
@@ -5778,7 +5971,7 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
// Set CR bit 6 to true if this is a vararg call with floating args passed in
// registers.
- if (isVarArg) {
+ if (IsVarArg) {
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, InFlag };
@@ -5788,14 +5981,12 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
InFlag = Chain.getValue(1);
}
- if (isTailCall)
+ if (IsTailCall)
PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
- /* unused except on PPC64 ELFv1 */ false, DAG,
- RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
- NumBytes, Ins, InVals, CS);
+ return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
+ Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
// Copy an argument into memory, being careful to do this outside the
@@ -5816,25 +6007,24 @@ SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
}
SDValue PPCTargetLowering::LowerCall_64SVR4(
- SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const {
+ const CallBase *CB) const {
bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
- bool hasNest = false;
bool IsSibCall = false;
+ bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
EVT PtrVT = getPointerTy(DAG.getDataLayout());
unsigned PtrByteSize = 8;
MachineFunction &MF = DAG.getMachineFunction();
- if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
+ if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
IsSibCall = true;
// Mark this function as potentially containing a function that contains a
@@ -5842,11 +6032,10 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// and restoring the callers stack pointer in this functions epilog. This is
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
- if (getTargetMachine().Options.GuaranteedTailCallOpt &&
- CallConv == CallingConv::Fast)
+ if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
- assert(!(CallConv == CallingConv::Fast && isVarArg) &&
+ assert(!(IsFastCall && CFlags.IsVarArg) &&
"fastcc not supported on varargs functions");
// Count how many bytes are to be pushed on the stack, including the linkage
@@ -5876,7 +6065,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// can be passed to the callee in registers.
// For the fast calling convention, there is another check below.
// Note: We should keep consistent with LowerFormalArguments_64SVR4()
- bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
+ bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
if (!HasParameterArea) {
unsigned ParamAreaSize = NumGPRs * PtrByteSize;
unsigned AvailableFPRs = NumFPRs;
@@ -5898,7 +6087,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// Avoid allocating parameter area for fastcc functions if all the arguments
// can be passed in the registers.
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
HasParameterArea = false;
// Add up all the space actually used.
@@ -5910,7 +6099,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (Flags.isNest())
continue;
- if (CallConv == CallingConv::Fast) {
+ if (IsFastCall) {
if (Flags.isByVal()) {
NumGPRsUsed += (Flags.getByValSize()+7)/8;
if (NumGPRsUsed > NumGPRs)
@@ -5958,9 +6147,9 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
}
/* Respect alignment of argument on the stack. */
- unsigned Align =
- CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
- NumBytes = ((NumBytes + Align - 1) / Align) * Align;
+ auto Alignement =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ NumBytes = alignTo(NumBytes, Alignement);
NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
if (Flags.isInConsecutiveRegsLast())
@@ -5983,8 +6172,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
NumBytes = LinkageSize;
// Tail call needs the stack to be aligned.
- if (getTargetMachine().Options.GuaranteedTailCallOpt &&
- CallConv == CallingConv::Fast)
+ if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
int SPDiff = 0;
@@ -5992,11 +6180,11 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
if (!IsSibCall)
- SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+ SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
// To protect arguments on the stack from being clobbered in a tail call,
// force all the loads to happen before doing any other lowering.
- if (isTailCall)
+ if (CFlags.IsTailCall)
Chain = DAG.getStackArgumentTokenFactor(Chain);
// Adjust the stack pointer for the new arguments...
@@ -6040,16 +6228,16 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// we'll actually use a stack slot.
auto ComputePtrOff = [&]() {
/* Respect alignment of argument on the stack. */
- unsigned Align =
- CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
- ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ auto Alignment =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = alignTo(ArgOffset, Alignment);
PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
};
- if (CallConv != CallingConv::Fast) {
+ if (!IsFastCall) {
ComputePtrOff();
/* Compute GPR index associated with argument offset. */
@@ -6080,7 +6268,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (Size == 0)
continue;
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ComputePtrOff();
// All aggregates smaller than 8 bytes must be passed right-justified.
@@ -6185,7 +6373,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (Flags.isNest()) {
// The 'nest' parameter, if any, is passed in R11.
RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
- hasNest = true;
break;
}
@@ -6195,18 +6382,18 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (GPR_idx != NumGPRs) {
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ComputePtrOff();
assert(HasParameterArea &&
"Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- true, isTailCall, false, MemOpChains,
+ true, CFlags.IsTailCall, false, MemOpChains,
TailCallArguments, dl);
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ArgOffset += PtrByteSize;
}
- if (CallConv != CallingConv::Fast)
+ if (!IsFastCall)
ArgOffset += PtrByteSize;
break;
case MVT::f32:
@@ -6220,7 +6407,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// Unnamed arguments for vararg functions always go to GPRs and
// then the parameter save area. For now, put all arguments to vararg
// routines always in both locations (FPR *and* GPR or stack slot).
- bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
+ bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
bool NeededLoad = false;
// First load the argument into the next available FPR.
@@ -6230,7 +6417,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// Next, load the argument into GPR or stack slot if needed.
if (!NeedGPROrStack)
;
- else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
+ else if (GPR_idx != NumGPRs && !IsFastCall) {
// FIXME: We may want to re-enable this for CallingConv::Fast on the P8
// once we support fp <-> gpr moves.
@@ -6274,7 +6461,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (ArgVal.getNode())
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
} else {
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ComputePtrOff();
// Single-precision floating-point values are mapped to the
@@ -6288,7 +6475,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
assert(HasParameterArea &&
"Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- true, isTailCall, false, MemOpChains,
+ true, CFlags.IsTailCall, false, MemOpChains,
TailCallArguments, dl);
NeededLoad = true;
@@ -6296,7 +6483,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// When passing an array of floats, the array occupies consecutive
// space in the argument area; only round up to the next doubleword
// at the end of the array. Otherwise, each float takes 8 bytes.
- if (CallConv != CallingConv::Fast || NeededLoad) {
+ if (!IsFastCall || NeededLoad) {
ArgOffset += (Arg.getValueType() == MVT::f32 &&
Flags.isInConsecutiveRegs()) ? 4 : 8;
if (Flags.isInConsecutiveRegsLast())
@@ -6321,7 +6508,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// usual; unnamed arguments always go to the stack or the corresponding
// GPRs when within range. For now, we always put the value in both
// locations (or even all three).
- if (isVarArg) {
+ if (CFlags.IsVarArg) {
assert(HasParameterArea &&
"Parameter area must exist if we have a varargs call.");
// We could elide this store in the case where the object fits
@@ -6353,19 +6540,19 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (VR_idx != NumVRs) {
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
} else {
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ComputePtrOff();
assert(HasParameterArea &&
"Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- true, isTailCall, true, MemOpChains,
+ true, CFlags.IsTailCall, true, MemOpChains,
TailCallArguments, dl);
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ArgOffset += 16;
}
- if (CallConv != CallingConv::Fast)
+ if (!IsFastCall)
ArgOffset += 16;
break;
} // not QPX
@@ -6377,7 +6564,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
case MVT::v4f64:
case MVT::v4i1: {
bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
- if (isVarArg) {
+ if (CFlags.IsVarArg) {
assert(HasParameterArea &&
"Parameter area must exist if we have a varargs call.");
// We could elide this store in the case where the object fits
@@ -6409,19 +6596,19 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (QFPR_idx != NumQFPRs) {
RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
} else {
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ComputePtrOff();
assert(HasParameterArea &&
"Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- true, isTailCall, true, MemOpChains,
+ true, CFlags.IsTailCall, true, MemOpChains,
TailCallArguments, dl);
- if (CallConv == CallingConv::Fast)
+ if (IsFastCall)
ArgOffset += (IsF32 ? 16 : 32);
}
- if (CallConv != CallingConv::Fast)
+ if (!IsFastCall)
ArgOffset += (IsF32 ? 16 : 32);
break;
}
@@ -6438,23 +6625,26 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// Check if this is an indirect call (MTCTR/BCTRL).
// See prepareDescriptorIndirectCall and buildCallOperands for more
// information about calls through function pointers in the 64-bit SVR4 ABI.
- if (!isTailCall && !isPatchPoint &&
- !isFunctionGlobalAddress(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee)) {
- // Load r2 into a virtual register and store it to the TOC save area.
- setUsesTOCBasePtr(DAG);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
- // TOC save area offset.
- unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
- SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
- Chain = DAG.getStore(
- Val.getValue(1), dl, Val, AddPtr,
- MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
+ if (CFlags.IsIndirect) {
+ // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
+ // caller in the TOC save area.
+ if (isTOCSaveRestoreRequired(Subtarget)) {
+ assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
+ // Load r2 into a virtual register and store it to the TOC save area.
+ setUsesTOCBasePtr(DAG);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
+ // TOC save area offset.
+ unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
+ MachinePointerInfo::getStack(
+ DAG.getMachineFunction(), TOCSaveOffset));
+ }
// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
// This does not mean the MTCTR instruction must use R12; it's easier
// to model this as an extra parameter, so do that.
- if (isELFv2ABI && !isPatchPoint)
+ if (isELFv2ABI && !CFlags.IsPatchPoint)
RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
@@ -6467,23 +6657,21 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
InFlag = Chain.getValue(1);
}
- if (isTailCall && !IsSibCall)
+ if (CFlags.IsTailCall && !IsSibCall)
PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
- DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
- SPDiff, NumBytes, Ins, InVals, CS);
+ return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
+ Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
SDValue PPCTargetLowering::LowerCall_Darwin(
- SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const {
+ const CallBase *CB) const {
unsigned NumOps = Outs.size();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -6498,7 +6686,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
- CallConv == CallingConv::Fast)
+ CFlags.CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
// Count how many bytes are to be pushed on the stack, including the linkage
@@ -6521,7 +6709,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
- if (!isVarArg && !isPPC64) {
+ if (!CFlags.IsVarArg && !isPPC64) {
// Non-varargs Altivec parameters go after all the non-Altivec
// parameters; handle those later so we know how much padding we need.
nAltivecParamsAtEnd++;
@@ -6548,16 +6736,16 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
// Tail call needs the stack to be aligned.
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
- CallConv == CallingConv::Fast)
+ CFlags.CallConv == CallingConv::Fast)
NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
- int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+ int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
// To protect arguments on the stack from being clobbered in a tail call,
// force all the loads to happen before doing any other lowering.
- if (isTailCall)
+ if (CFlags.IsTailCall)
Chain = DAG.getStackArgumentTokenFactor(Chain);
// Adjust the stack pointer for the new arguments...
@@ -6693,7 +6881,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, isTailCall, false, MemOpChains,
+ isPPC64, CFlags.IsTailCall, false, MemOpChains,
TailCallArguments, dl);
}
ArgOffset += PtrByteSize;
@@ -6703,7 +6891,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
if (FPR_idx != NumFPRs) {
RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
- if (isVarArg) {
+ if (CFlags.IsVarArg) {
SDValue Store =
DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
MemOpChains.push_back(Store);
@@ -6735,7 +6923,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
}
} else
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, isTailCall, false, MemOpChains,
+ isPPC64, CFlags.IsTailCall, false, MemOpChains,
TailCallArguments, dl);
if (isPPC64)
ArgOffset += 8;
@@ -6746,7 +6934,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
- if (isVarArg) {
+ if (CFlags.IsVarArg) {
// These go aligned on the stack, or in the corresponding R registers
// when within range. The Darwin PPC ABI doc claims they also go in
// V registers; in fact gcc does this only for arguments that are
@@ -6792,7 +6980,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
} else if (nAltivecParamsAtEnd==0) {
// We are emitting Altivec params in order.
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, isTailCall, true, MemOpChains,
+ isPPC64, CFlags.IsTailCall, true, MemOpChains,
TailCallArguments, dl);
ArgOffset += 16;
}
@@ -6804,7 +6992,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
// don't track this here because nobody below needs it.
// If there are more Altivec parameters than fit in registers emit
// the stores here.
- if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
+ if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {
unsigned j = 0;
// Offset is aligned; skip 1st 12 params which go in V registers.
ArgOffset = ((ArgOffset+15)/16)*16;
@@ -6818,7 +7006,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
SDValue PtrOff;
// We are emitting Altivec params in order.
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
- isPPC64, isTailCall, true, MemOpChains,
+ isPPC64, CFlags.IsTailCall, true, MemOpChains,
TailCallArguments, dl);
ArgOffset += 16;
}
@@ -6832,12 +7020,11 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
// On Darwin, R12 must contain the address of an indirect callee. This does
// not mean the MTCTR instruction must use R12; it's easier to model this as
// an extra parameter, so do that.
- if (!isTailCall &&
- !isFunctionGlobalAddress(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee) &&
- !isBLACompatibleAddress(Callee, DAG))
+ if (CFlags.IsIndirect) {
+ assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
PPC::R12), Callee));
+ }
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
@@ -6848,37 +7035,37 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
InFlag = Chain.getValue(1);
}
- if (isTailCall)
+ if (CFlags.IsTailCall)
PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
- /* unused except on PPC64 ELFv1 */ false, DAG,
- RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
- NumBytes, Ins, InVals, CS);
+ return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
+ Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State) {
+ const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
+ State.getMachineFunction().getSubtarget());
+ const bool IsPPC64 = Subtarget.isPPC64();
+ const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
+ const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+
+ assert((!ValVT.isInteger() ||
+ (ValVT.getSizeInBits() <= RegVT.getSizeInBits())) &&
+ "Integer argument exceeds register size: should have been legalized");
+
if (ValVT == MVT::f128)
report_fatal_error("f128 is unimplemented on AIX.");
- if (ArgFlags.isByVal())
- report_fatal_error("Passing structure by value is unimplemented.");
-
if (ArgFlags.isNest())
report_fatal_error("Nest arguments are unimplemented.");
if (ValVT.isVector() || LocVT.isVector())
report_fatal_error("Vector arguments are unimplemented on AIX.");
- const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
- State.getMachineFunction().getSubtarget());
- const bool IsPPC64 = Subtarget.isPPC64();
- const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
-
static const MCPhysReg GPR_32[] = {// 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10};
@@ -6886,6 +7073,38 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10};
+ if (ArgFlags.isByVal()) {
+ if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
+ report_fatal_error("Pass-by-value arguments with alignment greater than "
+ "register width are not supported.");
+
+ const unsigned ByValSize = ArgFlags.getByValSize();
+
+ // An empty aggregate parameter takes up no storage and no registers,
+ // but needs a MemLoc for a stack slot for the formal arguments side.
+ if (ByValSize == 0) {
+ State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
+ State.getNextStackOffset(), RegVT,
+ LocInfo));
+ return false;
+ }
+
+ const unsigned StackSize = alignTo(ByValSize, PtrAlign);
+ unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
+ for (const unsigned E = Offset + StackSize; Offset < E;
+ Offset += PtrAlign.value()) {
+ if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+ else {
+ State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
+ Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
+ LocInfo));
+ break;
+ }
+ }
+ return false;
+ }
+
// Arguments always reserve parameter save area.
switch (ValVT.SimpleTy) {
default:
@@ -6895,49 +7114,55 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
assert(IsPPC64 && "PPC32 should have split i64 values.");
LLVM_FALLTHROUGH;
case MVT::i1:
- case MVT::i32:
- State.AllocateStack(PtrByteSize, PtrByteSize);
- if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
- MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
- // Promote integers if needed.
- if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
- LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
- : CCValAssign::LocInfo::ZExt;
+ case MVT::i32: {
+ const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
+ // AIX integer arguments are always passed in register width.
+ if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
+ LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
+ : CCValAssign::LocInfo::ZExt;
+ if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
- }
else
- report_fatal_error("Handling of placing parameters on the stack is "
- "unimplemented!");
- return false;
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
+ return false;
+ }
case MVT::f32:
case MVT::f64: {
// Parameter save area (PSA) is reserved even if the float passes in fpr.
const unsigned StoreSize = LocVT.getStoreSize();
// Floats are always 4-byte aligned in the PSA on AIX.
// This includes f64 in 64-bit mode for ABI compatibility.
- State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4);
- if (unsigned Reg = State.AllocateReg(FPR))
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- else
- report_fatal_error("Handling of placing parameters on the stack is "
- "unimplemented!");
-
- // AIX requires that GPRs are reserved for float arguments.
- // Successfully reserved GPRs are only initialized for vararg calls.
- MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
- for (unsigned I = 0; I < StoreSize; I += PtrByteSize) {
+ const unsigned Offset =
+ State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
+ unsigned FReg = State.AllocateReg(FPR);
+ if (FReg)
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
+
+ // Reserve and initialize GPRs or initialize the PSA as required.
+ for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+ assert(FReg && "An FPR should be available when a GPR is reserved.");
if (State.isVarArg()) {
+ // Successfully reserved GPRs are only initialized for vararg calls.
// Custom handling is required for:
// f64 in PPC32 needs to be split into 2 GPRs.
// f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
State.addLoc(
CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
}
- } else if (State.isVarArg()) {
- report_fatal_error("Handling of placing parameters on the stack is "
- "unimplemented!");
+ } else {
+ // If there are insufficient GPRs, the PSA needs to be initialized.
+ // Initialization occurs even if an FPR was initialized for
+ // compatibility with the AIX XL compiler. The full memory for the
+ // argument will be initialized even if a prior word is saved in GPR.
+ // A custom memLoc is used when the argument also passes in FPR so
+ // that the callee handling can skip over it easily.
+ State.addLoc(
+ FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
+ LocInfo)
+ : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ break;
}
}
@@ -6982,6 +7207,64 @@ static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
}
+static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
+ const unsigned LASize = FL->getLinkageSize();
+
+ if (PPC::GPRCRegClass.contains(Reg)) {
+ assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
+ "Reg must be a valid argument register!");
+ return LASize + 4 * (Reg - PPC::R3);
+ }
+
+ if (PPC::G8RCRegClass.contains(Reg)) {
+ assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
+ "Reg must be a valid argument register!");
+ return LASize + 8 * (Reg - PPC::X3);
+ }
+
+ llvm_unreachable("Only general purpose registers expected.");
+}
+
+// AIX ABI Stack Frame Layout:
+//
+// Low Memory +--------------------------------------------+
+// SP +---> | Back chain | ---+
+// | +--------------------------------------------+ |
+// | | Saved Condition Register | |
+// | +--------------------------------------------+ |
+// | | Saved Linkage Register | |
+// | +--------------------------------------------+ | Linkage Area
+// | | Reserved for compilers | |
+// | +--------------------------------------------+ |
+// | | Reserved for binders | |
+// | +--------------------------------------------+ |
+// | | Saved TOC pointer | ---+
+// | +--------------------------------------------+
+// | | Parameter save area |
+// | +--------------------------------------------+
+// | | Alloca space |
+// | +--------------------------------------------+
+// | | Local variable space |
+// | +--------------------------------------------+
+// | | Float/int conversion temporary |
+// | +--------------------------------------------+
+// | | Save area for AltiVec registers |
+// | +--------------------------------------------+
+// | | AltiVec alignment padding |
+// | +--------------------------------------------+
+// | | Save area for VRSAVE register |
+// | +--------------------------------------------+
+// | | Save area for General Purpose registers |
+// | +--------------------------------------------+
+// | | Save area for Floating Point registers |
+// | +--------------------------------------------+
+// +---- | Back chain |
+// High Memory +--------------------------------------------+
+//
+// Specifications:
+// AIX 7.2 Assembler Language Reference
+// Subroutine linkage convention
+
SDValue PPCTargetLowering::LowerFormalArguments_AIX(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -6991,9 +7274,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
CallConv == CallingConv::Fast) &&
"Unexpected calling convention!");
- if (isVarArg)
- report_fatal_error("This call type is unimplemented on AIX.");
-
if (getTargetMachine().Options.GuaranteedTailCallOpt)
report_fatal_error("Tail call support is unimplemented on AIX.");
@@ -7011,67 +7291,214 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+ const EVT PtrVT = getPointerTy(MF.getDataLayout());
// Reserve space for the linkage area on the stack.
const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
- // On AIX a minimum of 8 words is saved to the parameter save area.
- const unsigned MinParameterSaveArea = 8 * PtrByteSize;
- CCInfo.AllocateStack(LinkageSize + MinParameterSaveArea, PtrByteSize);
+ CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- SDValue ArgValue;
- ISD::ArgFlagsTy Flags = Ins[i].Flags;
- if (VA.isRegLoc()) {
- EVT ValVT = VA.getValVT();
- MVT LocVT = VA.getLocVT();
+ SmallVector<SDValue, 8> MemOps;
+
+ for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
+ CCValAssign &VA = ArgLocs[I++];
+ MVT LocVT = VA.getLocVT();
+ ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
+
+ // For compatibility with the AIX XL compiler, the float args in the
+ // parameter save area are initialized even if the argument is available
+ // in register. The caller is required to initialize both the register
+ // and memory, however, the callee can choose to expect it in either.
+ // The memloc is dismissed here because the argument is retrieved from
+ // the register.
+ if (VA.isMemLoc() && VA.needsCustom())
+ continue;
+
+ if (Flags.isByVal() && VA.isMemLoc()) {
+ const unsigned Size =
+ alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
+ PtrByteSize);
+ const int FI = MF.getFrameInfo().CreateFixedObject(
+ Size, VA.getLocMemOffset(), /* IsImmutable */ false,
+ /* IsAliased */ true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+
+ continue;
+ }
+
+ if (Flags.isByVal()) {
+ assert(VA.isRegLoc() && "MemLocs should already be handled.");
+
+ const MCPhysReg ArgReg = VA.getLocReg();
+ const PPCFrameLowering *FL = Subtarget.getFrameLowering();
+
+ if (Flags.getNonZeroByValAlign() > PtrByteSize)
+ report_fatal_error("Over aligned byvals not supported yet.");
+
+ const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
+ const int FI = MF.getFrameInfo().CreateFixedObject(
+ StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
+ /* IsAliased */ true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+
+ // Add live ins for all the RegLocs for the same ByVal.
+ const TargetRegisterClass *RegClass =
+ IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+
+ auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
+ unsigned Offset) {
+ const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
+ // Since the callers side has left justified the aggregate in the
+ // register, we can simply store the entire register into the stack
+ // slot.
+ SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+ // The store to the fixedstack object is needed becuase accessing a
+ // field of the ByVal will use a gep and load. Ideally we will optimize
+ // to extracting the value from the register directly, and elide the
+ // stores when the arguments address is not taken, but that will need to
+ // be future work.
+ SDValue Store =
+ DAG.getStore(CopyFrom.getValue(1), dl, CopyFrom,
+ DAG.getObjectPtrOffset(dl, FIN, Offset),
+ MachinePointerInfo::getFixedStack(MF, FI, Offset));
+
+ MemOps.push_back(Store);
+ };
+
+ unsigned Offset = 0;
+ HandleRegLoc(VA.getLocReg(), Offset);
+ Offset += PtrByteSize;
+ for (; Offset != StackSize && ArgLocs[I].isRegLoc();
+ Offset += PtrByteSize) {
+ assert(ArgLocs[I].getValNo() == VA.getValNo() &&
+ "RegLocs should be for ByVal argument.");
+
+ const CCValAssign RL = ArgLocs[I++];
+ HandleRegLoc(RL.getLocReg(), Offset);
+ }
+
+ if (Offset != StackSize) {
+ assert(ArgLocs[I].getValNo() == VA.getValNo() &&
+ "Expected MemLoc for remaining bytes.");
+ assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
+ // Consume the MemLoc.The InVal has already been emitted, so nothing
+ // more needs to be done.
+ ++I;
+ }
+
+ continue;
+ }
+
+ EVT ValVT = VA.getValVT();
+ if (VA.isRegLoc() && !VA.needsCustom()) {
MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
unsigned VReg =
MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
- ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
if (ValVT.isScalarInteger() &&
(ValVT.getSizeInBits() < LocVT.getSizeInBits())) {
ArgValue =
truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
}
InVals.push_back(ArgValue);
- } else {
- report_fatal_error("Handling of formal arguments on the stack is "
- "unimplemented!");
+ continue;
+ }
+ if (VA.isMemLoc()) {
+ const unsigned LocSize = LocVT.getStoreSize();
+ const unsigned ValSize = ValVT.getStoreSize();
+ assert((ValSize <= LocSize) &&
+ "Object size is larger than size of MemLoc");
+ int CurArgOffset = VA.getLocMemOffset();
+ // Objects are right-justified because AIX is big-endian.
+ if (LocSize > ValSize)
+ CurArgOffset += LocSize - ValSize;
+ // Potential tail calls could cause overwriting of argument stack slots.
+ const bool IsImmutable =
+ !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
+ int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue ArgValue =
+ DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
+ InVals.push_back(ArgValue);
+ continue;
}
}
+ // On AIX a minimum of 8 words is saved to the parameter save area.
+ const unsigned MinParameterSaveArea = 8 * PtrByteSize;
// Area that is at least reserved in the caller of this function.
- unsigned MinReservedArea = CCInfo.getNextStackOffset();
+ unsigned CallerReservedArea =
+ std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
// Set the size that is at least reserved in caller of this function. Tail
// call optimized function's reserved stack space needs to be aligned so
// that taking the difference between two stack areas will result in an
// aligned stack.
- MinReservedArea =
- EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
+ CallerReservedArea =
+ EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- FuncInfo->setMinReservedArea(MinReservedArea);
+ FuncInfo->setMinReservedArea(CallerReservedArea);
+
+ if (isVarArg) {
+ FuncInfo->setVarArgsFrameIndex(
+ MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10};
+
+ static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10};
+ const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
+
+ // The fixed integer arguments of a variadic function are stored to the
+ // VarArgsFrameIndex on the stack so that they may be loaded by
+ // dereferencing the result of va_next.
+ for (unsigned GPRIndex =
+ (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
+ GPRIndex < NumGPArgRegs; ++GPRIndex) {
+
+ const unsigned VReg =
+ IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
+ : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
+ MemOps.push_back(Store);
+ // Increment the address for the next argument to store.
+ SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return Chain;
}
SDValue PPCTargetLowering::LowerCall_AIX(
- SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const {
+ const CallBase *CB) const {
+ // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
+ // AIX ABI stack frame layout.
- assert((CallConv == CallingConv::C ||
- CallConv == CallingConv::Cold ||
- CallConv == CallingConv::Fast) && "Unexpected calling convention!");
+ assert((CFlags.CallConv == CallingConv::C ||
+ CFlags.CallConv == CallingConv::Cold ||
+ CFlags.CallConv == CallingConv::Fast) &&
+ "Unexpected calling convention!");
- if (isPatchPoint)
+ if (CFlags.IsPatchPoint)
report_fatal_error("This call type is unimplemented on AIX.");
const PPCSubtarget& Subtarget =
@@ -7083,7 +7510,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+ CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
+ *DAG.getContext());
// Reserve space for the linkage save area (LSA) on the stack.
// In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
@@ -7091,8 +7519,9 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
const bool IsPPC64 = Subtarget.isPPC64();
+ const EVT PtrVT = getPointerTy(DAG.getDataLayout());
const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
- CCInfo.AllocateStack(LinkageSize, PtrByteSize);
+ CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
// The prolog code of the callee may store up to 8 GPR argument registers to
@@ -7102,7 +7531,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
- const unsigned NumBytes = LinkageSize + MinParameterSaveAreaSize;
+ const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
+ CCInfo.getNextStackOffset());
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass.
@@ -7110,77 +7540,192 @@ SDValue PPCTargetLowering::LowerCall_AIX(
SDValue CallSeqStart = Chain;
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Set up a copy of the stack pointer for loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
+ : DAG.getRegister(PPC::R1, MVT::i32);
for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
- CCValAssign &VA = ArgLocs[I++];
+ const unsigned ValNo = ArgLocs[I].getValNo();
+ SDValue Arg = OutVals[ValNo];
+ ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
+
+ if (Flags.isByVal()) {
+ const unsigned ByValSize = Flags.getByValSize();
- if (VA.isMemLoc())
- report_fatal_error("Handling of placing parameters on the stack is "
- "unimplemented!");
- if (!VA.isRegLoc())
- report_fatal_error(
- "Unexpected non-register location for function call argument.");
+ // Nothing to do for zero-sized ByVals on the caller side.
+ if (!ByValSize) {
+ ++I;
+ continue;
+ }
- SDValue Arg = OutVals[VA.getValNo()];
+ auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
+ return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
+ (LoadOffset != 0)
+ ? DAG.getObjectPtrOffset(dl, Arg, LoadOffset)
+ : Arg,
+ MachinePointerInfo(), VT);
+ };
- if (!VA.needsCustom()) {
- switch (VA.getLocInfo()) {
- default:
- report_fatal_error("Unexpected argument extension type.");
- case CCValAssign::Full:
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
- break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
- break;
+ unsigned LoadOffset = 0;
+
+ // Initialize registers, which are fully occupied by the by-val argument.
+ while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
+ SDValue Load = GetLoad(PtrVT, LoadOffset);
+ MemOpChains.push_back(Load.getValue(1));
+ LoadOffset += PtrByteSize;
+ const CCValAssign &ByValVA = ArgLocs[I++];
+ assert(ByValVA.getValNo() == ValNo &&
+ "Unexpected location for pass-by-value argument.");
+ RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
+ }
+
+ if (LoadOffset == ByValSize)
+ continue;
+
+ // There must be one more loc to handle the remainder.
+ assert(ArgLocs[I].getValNo() == ValNo &&
+ "Expected additional location for by-value argument.");
+
+ if (ArgLocs[I].isMemLoc()) {
+ assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
+ const CCValAssign &ByValVA = ArgLocs[I++];
+ ISD::ArgFlagsTy MemcpyFlags = Flags;
+ // Only memcpy the bytes that don't pass in register.
+ MemcpyFlags.setByValSize(ByValSize - LoadOffset);
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(
+ (LoadOffset != 0) ? DAG.getObjectPtrOffset(dl, Arg, LoadOffset)
+ : Arg,
+ DAG.getObjectPtrOffset(dl, StackPtr, ByValVA.getLocMemOffset()),
+ CallSeqStart, MemcpyFlags, DAG, dl);
+ continue;
+ }
+
+ // Initialize the final register residue.
+ // Any residue that occupies the final by-val arg register must be
+ // left-justified on AIX. Loads must be a power-of-2 size and cannot be
+ // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
+ // 2 and 1 byte loads.
+ const unsigned ResidueBytes = ByValSize % PtrByteSize;
+ assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
+ "Unexpected register residue for by-value argument.");
+ SDValue ResidueVal;
+ for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
+ const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
+ const MVT VT =
+ N == 1 ? MVT::i8
+ : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
+ SDValue Load = GetLoad(VT, LoadOffset);
+ MemOpChains.push_back(Load.getValue(1));
+ LoadOffset += N;
+ Bytes += N;
+
+ // By-val arguments are passed left-justfied in register.
+ // Every load here needs to be shifted, otherwise a full register load
+ // should have been used.
+ assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
+ "Unexpected load emitted during handling of pass-by-value "
+ "argument.");
+ unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
+ EVT ShiftAmountTy =
+ getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
+ SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
+ SDValue ShiftedLoad =
+ DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
+ ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
+ ShiftedLoad)
+ : ShiftedLoad;
}
+
+ const CCValAssign &ByValVA = ArgLocs[I++];
+ RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
+ continue;
+ }
+
+ CCValAssign &VA = ArgLocs[I++];
+ const MVT LocVT = VA.getLocVT();
+ const MVT ValVT = VA.getValVT();
+
+ switch (VA.getLocInfo()) {
+ default:
+ report_fatal_error("Unexpected argument extension type.");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc() && !VA.needsCustom()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ if (VA.isMemLoc()) {
+ SDValue PtrOff =
+ DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ MemOpChains.push_back(
+ DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
continue;
}
// Custom handling is used for GPR initializations for vararg float
// arguments.
- assert(isVarArg && VA.getValVT().isFloatingPoint() &&
- VA.getLocVT().isInteger() &&
- "Unexpected custom register handling for calling convention.");
+ assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
+ ValVT.isFloatingPoint() && LocVT.isInteger() &&
+ "Unexpected register handling for calling convention.");
SDValue ArgAsInt =
- DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg);
+ DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
- if (Arg.getValueType().getStoreSize() == VA.getLocVT().getStoreSize())
+ if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
// f32 in 32-bit GPR
// f64 in 64-bit GPR
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
- else if (Arg.getValueType().getSizeInBits() < VA.getLocVT().getSizeInBits())
+ else if (Arg.getValueType().getSizeInBits() < LocVT.getSizeInBits())
// f32 in 64-bit GPR.
RegsToPass.push_back(std::make_pair(
- VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, VA.getLocVT())));
+ VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
else {
// f64 in two 32-bit GPRs
// The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
- assert(Arg.getValueType() == MVT::f64 && isVarArg && !IsPPC64 &&
+ assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
"Unexpected custom register for argument!");
CCValAssign &GPR1 = VA;
SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
DAG.getConstant(32, dl, MVT::i8));
RegsToPass.push_back(std::make_pair(
GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
- assert(I != E && "A second custom GPR is expected!");
- CCValAssign &GPR2 = ArgLocs[I++];
- assert(GPR2.isRegLoc() && GPR2.getValNo() == GPR1.getValNo() &&
- GPR2.needsCustom() && "A second custom GPR is expected!");
- RegsToPass.push_back(std::make_pair(
- GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
+
+ if (I != E) {
+ // If only 1 GPR was available, there will only be one custom GPR and
+ // the argument will also pass in memory.
+ CCValAssign &PeekArg = ArgLocs[I];
+ if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
+ assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
+ CCValAssign &GPR2 = ArgLocs[I++];
+ RegsToPass.push_back(std::make_pair(
+ GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
+ }
+ }
}
}
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+
// For indirect calls, we need to save the TOC base to the stack for
// restoration after the call.
- if (!isTailCall && !isPatchPoint &&
- !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee)) {
+ if (CFlags.IsIndirect) {
+ assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
@@ -7206,10 +7751,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
}
const int SPDiff = 0;
- return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
- /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass,
- InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins,
- InVals, CS);
+ return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
+ Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
bool
@@ -7281,25 +7824,6 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
- const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const MCPhysReg *I =
- TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
- if (I) {
- for (; *I; ++I) {
-
- if (PPC::G8RCRegClass.contains(*I))
- RetOps.push_back(DAG.getRegister(*I, MVT::i64));
- else if (PPC::F8RCRegClass.contains(*I))
- RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
- else if (PPC::CRRCRegClass.contains(*I))
- RetOps.push_back(DAG.getRegister(*I, MVT::i1));
- else if (PPC::VRRCRegClass.contains(*I))
- RetOps.push_back(DAG.getRegister(*I, MVT::Other));
- else
- llvm_unreachable("Unexpected register class in CSRsViaCopy!");
- }
- }
-
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
@@ -7401,6 +7925,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
// Get the inputs.
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
@@ -7413,9 +7938,10 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
DAG.getConstant(0, dl, PtrVT), Size);
// Construct a node for the frame pointer save index.
SDValue FPSIdx = getFramePointerFrameIndex(DAG);
- // Build a DYNALLOC node.
SDValue Ops[3] = { Chain, NegSize, FPSIdx };
SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
+ if (hasInlineStackProbe(MF))
+ return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
}
@@ -7564,15 +8090,6 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
!Op.getOperand(2).getValueType().isFloatingPoint())
return Op;
- bool HasNoInfs = DAG.getTarget().Options.NoInfsFPMath;
- bool HasNoNaNs = DAG.getTarget().Options.NoNaNsFPMath;
- // We might be able to do better than this under some circumstances, but in
- // general, fsel-based lowering of select is a finite-math-only optimization.
- // For more information, see section F.3 of the 2.06 ISA specification.
- // With ISA 3.0, we have xsmaxcdp/xsmincdp which are OK to emit even in the
- // presence of infinities.
- if (!Subtarget.hasP9Vector() && (!HasNoInfs || !HasNoNaNs))
- return Op;
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
EVT ResVT = Op.getValueType();
@@ -7580,14 +8097,14 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
SDLoc dl(Op);
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ // We have xsmaxcdp/xsmincdp which are OK to emit even in the
+ // presence of infinities.
if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
switch (CC) {
default:
- // Not a min/max but with finite math, we may still be able to use fsel.
- if (HasNoInfs && HasNoNaNs)
- break;
- return Op;
+ break;
case ISD::SETOGT:
case ISD::SETGT:
return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
@@ -7597,10 +8114,13 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
}
- // TODO: Propagate flags from the select rather than global settings.
- SDNodeFlags Flags;
- Flags.setNoInfs(true);
- Flags.setNoNaNs(true);
+ // We might be able to do better than this under some circumstances, but in
+ // general, fsel-based lowering of select is a finite-math-only optimization.
+ // For more information, see section F.3 of the 2.06 ISA specification.
+ // With ISA 3.0
+ if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
+ (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
+ return Op;
// If the RHS of the comparison is a 0.0, we don't need to do the
// subtraction at all.
@@ -7720,15 +8240,17 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
// Emit a store to the stack slot.
SDValue Chain;
+ Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
if (i32Stack) {
MachineFunction &MF = DAG.getMachineFunction();
+ Alignment = Align(4);
MachineMemOperand *MMO =
- MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
+ MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
} else
- Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI);
+ Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI, Alignment);
// Result is a load from the stack slot. If loading 4 bytes, make sure to
// add in a bias on big endian.
@@ -7741,6 +8263,7 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
RLI.Chain = Chain;
RLI.Ptr = FIPtr;
RLI.MPI = MPI;
+ RLI.Alignment = Alignment;
}
/// Custom lowers floating point to integer conversions to use
@@ -7782,7 +8305,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const {
// FP to INT conversions are legal for f128.
- if (EnableQuadPrecision && (Op->getOperand(0).getValueType() == MVT::f128))
+ if (Op->getOperand(0).getValueType() == MVT::f128)
return Op;
// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
@@ -7848,9 +8371,10 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
SelectionDAG &DAG,
ISD::LoadExtType ET) const {
SDLoc dl(Op);
+ bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
+ (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
if (ET == ISD::NON_EXTLOAD &&
- (Op.getOpcode() == ISD::FP_TO_UINT ||
- Op.getOpcode() == ISD::FP_TO_SINT) &&
+ (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
isOperationLegalOrCustom(Op.getOpcode(),
Op.getOperand(0).getValueType())) {
@@ -7877,7 +8401,7 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
RLI.MPI = LD->getPointerInfo();
RLI.IsDereferenceable = LD->isDereferenceable();
RLI.IsInvariant = LD->isInvariant();
- RLI.Alignment = LD->getAlignment();
+ RLI.Alignment = LD->getAlign();
RLI.AAInfo = LD->getAAInfo();
RLI.Ranges = LD->getRanges();
@@ -8021,16 +8545,19 @@ SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
SDValue ShuffleSrc2 =
SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
- unsigned ExtendOp =
- SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST;
SDValue Extend;
- if (!Subtarget.hasP9Altivec() && SignedConv) {
+ if (SignedConv) {
Arrange = DAG.getBitcast(IntermediateVT, Arrange);
+ EVT ExtVT = Op.getOperand(0).getValueType();
+ if (Subtarget.hasP9Altivec())
+ ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
+ IntermediateVT.getVectorNumElements());
+
Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
- DAG.getValueType(Op.getOperand(0).getValueType()));
+ DAG.getValueType(ExtVT));
} else
- Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange);
+ Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
}
@@ -8046,7 +8573,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
return LowerINT_TO_FPVector(Op, DAG, dl);
// Conversions to f128 are legal.
- if (EnableQuadPrecision && (Op.getValueType() == MVT::f128))
+ if (Op.getValueType() == MVT::f128)
return Op;
if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
@@ -8141,8 +8668,10 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SINT, DAG.getConstant(53, dl, MVT::i32));
Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
Cond, DAG.getConstant(1, dl, MVT::i64));
- Cond = DAG.getSetCC(dl, MVT::i32,
- Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
+ Cond = DAG.getSetCC(
+ dl,
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
+ Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
}
@@ -8183,7 +8712,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
MachineFrameInfo &MFI = MF.getFrameInfo();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- int FrameIdx = MFI.CreateStackObject(4, 4, false);
+ int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue Store =
@@ -8198,7 +8727,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
RLI.Chain = Store;
RLI.MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
- RLI.Alignment = 4;
+ RLI.Alignment = Align(4);
MachineMemOperand *MMO =
MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
@@ -8235,7 +8764,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
bool ReusingLoad;
if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
DAG))) {
- int FrameIdx = MFI.CreateStackObject(4, 4, false);
+ int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue Store =
@@ -8250,7 +8779,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
RLI.Chain = Store;
RLI.MPI =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
- RLI.Alignment = 4;
+ RLI.Alignment = Align(4);
}
MachineMemOperand *MMO =
@@ -8267,7 +8796,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
assert(Subtarget.isPPC64() &&
"i32->FP without LFIWAX supported only on PPC64");
- int FrameIdx = MFI.CreateStackObject(8, 8, false);
+ int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
@@ -8319,22 +8848,20 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
EVT PtrVT = getPointerTy(MF.getDataLayout());
// Save FP Control Word to register
- EVT NodeTys[] = {
- MVT::f64, // return register
- MVT::Glue // unused in this context
- };
- SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
+ SDValue Chain = Op.getOperand(0);
+ SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
+ Chain = MFFS.getValue(1);
// Save FP register to stack slot
- int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
+ int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
- MachinePointerInfo());
+ Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
// Load FP Control Word from low 32 bits of stack slot.
SDValue Four = DAG.getConstant(4, dl, PtrVT);
SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
- SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo());
+ SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
+ Chain = CWD.getValue(1);
// Transform as necessary
SDValue CWD1 =
@@ -8351,8 +8878,11 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SDValue RetVal =
DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
- return DAG.getNode((VT.getSizeInBits() < 16 ?
- ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+ RetVal =
+ DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
+ dl, VT, RetVal);
+
+ return DAG.getMergeValues({RetVal, Chain}, dl);
}
SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
@@ -8446,19 +8976,21 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
// Vector related lowering.
//
-/// BuildSplatI - Build a canonical splati of Val with an element size of
-/// SplatSize. Cast the result to VT.
-static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
- SelectionDAG &DAG, const SDLoc &dl) {
+/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
+/// element size of SplatSize. Cast the result to VT.
+static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
+ SelectionDAG &DAG, const SDLoc &dl) {
static const MVT VTys[] = { // canonical VT to use for each size.
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
};
EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
- // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
- if (Val == -1)
+ // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
+ if (Val == ((1LU << (SplatSize * 8)) - 1)) {
SplatSize = 1;
+ Val = 0xFF;
+ }
EVT CanonicalVT = VTys[SplatSize-1];
@@ -8569,10 +9101,9 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Op0 = Op->getOperand(0);
- if (!EnableQuadPrecision ||
- (Op.getValueType() != MVT::f128 ) ||
+ if ((Op.getValueType() != MVT::f128) ||
(Op0.getOpcode() != ISD::BUILD_PAIR) ||
- (Op0.getOperand(0).getValueType() != MVT::i64) ||
+ (Op0.getOperand(0).getValueType() != MVT::i64) ||
(Op0.getOperand(1).getValueType() != MVT::i64))
return SDValue();
@@ -8584,7 +9115,8 @@ static const SDValue *getNormalLoadInput(const SDValue &Op) {
const SDValue *InputLoad = &Op;
if (InputLoad->getOpcode() == ISD::BITCAST)
InputLoad = &InputLoad->getOperand(0);
- if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
+ InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED)
InputLoad = &InputLoad->getOperand(0);
if (InputLoad->getOpcode() != ISD::LOAD)
return nullptr;
@@ -8592,6 +9124,34 @@ static const SDValue *getNormalLoadInput(const SDValue &Op) {
return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
}
+// Convert the argument APFloat to a single precision APFloat if there is no
+// loss in information during the conversion to single precision APFloat and the
+// resulting number is not a denormal number. Return true if successful.
+bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
+ APFloat APFloatToConvert = ArgAPFloat;
+ bool LosesInfo = true;
+ APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
+ &LosesInfo);
+ bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
+ if (Success)
+ ArgAPFloat = APFloatToConvert;
+ return Success;
+}
+
+// Bitcast the argument APInt to a double and convert it to a single precision
+// APFloat, bitcast the APFloat to an APInt and assign it to the original
+// argument if there is no loss in information during the conversion from
+// double to single precision APFloat and the resulting number is not a denormal
+// number. Return true if successful.
+bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
+ double DpValue = ArgAPInt.bitsToDouble();
+ APFloat APFloatDp(DpValue);
+ bool Success = convertToNonDenormSingle(APFloatDp);
+ if (Success)
+ ArgAPInt = APFloatDp.bitcastToAPInt();
+ return Success;
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -8608,7 +9168,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// then convert it to a floating-point vector and compare it
// to a zero vector to get the boolean result.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(16, 16, false);
+ int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -8643,8 +9203,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
Constant *CP = ConstantVector::get(CV);
- SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
- 16 /* alignment */);
+ SDValue CPIdx =
+ DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), Align(16));
SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
@@ -8711,9 +9271,23 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
APInt APSplatBits, APSplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
- if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
- HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
- SplatBitSize > 32) {
+ bool BVNIsConstantSplat =
+ BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, !Subtarget.isLittleEndian());
+
+ // If it is a splat of a double, check if we can shrink it to a 32 bit
+ // non-denormal float which when converted back to double gives us the same
+ // double. This is to exploit the XXSPLTIDP instruction.
+ if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
+ (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
+ convertToNonDenormSingle(APSplatBits)) {
+ SDValue SplatNode = DAG.getNode(
+ PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
+ DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
+ return DAG.getBitcast(Op.getValueType(), SplatNode);
+ }
+
+ if (!BVNIsConstantSplat || SplatBitSize > 32) {
const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0));
// Handle load-and-splat patterns as we have instructions that will do this
@@ -8752,8 +9326,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return SDValue();
}
- unsigned SplatBits = APSplatBits.getZExtValue();
- unsigned SplatUndef = APSplatUndef.getZExtValue();
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+ uint64_t SplatUndef = APSplatUndef.getZExtValue();
unsigned SplatSize = SplatBitSize / 8;
// First, handle single instruction cases.
@@ -8768,17 +9342,30 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return Op;
}
- // We have XXSPLTIB for constant splats one byte wide
- // FIXME: SplatBits is an unsigned int being cast to an int while passing it
- // as an argument to BuildSplatiI. Given SplatSize == 1 it is okay here.
+ // We have XXSPLTIW for constant splats four bytes wide.
+ // Given vector length is a multiple of 4, 2-byte splats can be replaced
+ // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
+ // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
+ // turned into a 4-byte splat of 0xABABABAB.
+ if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
+ return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,
+ Op.getValueType(), DAG, dl);
+
+ if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
+ return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
+ dl);
+
+ // We have XXSPLTIB for constant splats one byte wide.
if (Subtarget.hasP9Vector() && SplatSize == 1)
- return BuildSplatI(SplatBits, SplatSize, Op.getValueType(), DAG, dl);
+ return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
+ dl);
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
(32-SplatBitSize));
if (SextVal >= -16 && SextVal <= 15)
- return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
+ return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
+ dl);
// Two instruction sequences.
@@ -8809,7 +9396,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// for fneg/fabs.
if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
// Make -1 and vspltisw -1:
- SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
+ SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
// Make the VSLW intrinsic, computing 0x8000_0000.
SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
@@ -8837,7 +9424,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// vsplti + shl self.
if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
- SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
Intrinsic::ppc_altivec_vslw
@@ -8848,7 +9435,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// vsplti + srl self.
if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
- SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
Intrinsic::ppc_altivec_vsrw
@@ -8859,7 +9446,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// vsplti + sra self.
if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
- SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
Intrinsic::ppc_altivec_vsraw
@@ -8871,7 +9458,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// vsplti + rol self.
if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
- SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
Intrinsic::ppc_altivec_vrlw
@@ -8882,19 +9469,19 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// t = vsplti c, result = vsldoi t, t, 1
if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
- SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 2
if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
- SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 3
if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
- SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
}
@@ -9193,6 +9780,107 @@ SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}
+/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
+/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
+/// return the default SDValue.
+SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG) const {
+ // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
+ // to v16i8. Peek through the bitcasts to get the actual operands.
+ SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
+ SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
+
+ auto ShuffleMask = SVN->getMask();
+ SDValue VecShuffle(SVN, 0);
+ SDLoc DL(SVN);
+
+ // Check that we have a four byte shuffle.
+ if (!isNByteElemShuffleMask(SVN, 4, 1))
+ return SDValue();
+
+ // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
+ if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
+ std::swap(LHS, RHS);
+ VecShuffle = DAG.getCommutedVectorShuffle(*SVN);
+ ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
+ }
+
+ // Ensure that the RHS is a vector of constants.
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
+ if (!BVN)
+ return SDValue();
+
+ // Check if RHS is a splat of 4-bytes (or smaller).
+ APInt APSplatValue, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
+ SplatBitSize > 32)
+ return SDValue();
+
+ // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
+ // The instruction splats a constant C into two words of the source vector
+ // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
+ // Thus we check that the shuffle mask is the equivalent of
+ // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
+ // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
+ // within each word are consecutive, so we only need to check the first byte.
+ SDValue Index;
+ bool IsLE = Subtarget.isLittleEndian();
+ if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
+ (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
+ ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
+ Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
+ else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
+ (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
+ ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
+ Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
+ else
+ return SDValue();
+
+ // If the splat is narrower than 32-bits, we need to get the 32-bit value
+ // for XXSPLTI32DX.
+ unsigned SplatVal = APSplatValue.getZExtValue();
+ for (; SplatBitSize < 32; SplatBitSize <<= 1)
+ SplatVal |= (SplatVal << SplatBitSize);
+
+ SDValue SplatNode = DAG.getNode(
+ PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
+ Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
+}
+
+/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
+/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
+/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
+/// i.e (or (shl x, C1), (srl x, 128-C1)).
+SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
+ assert(Op.getValueType() == MVT::v1i128 &&
+ "Only set v1i128 as custom, other type shouldn't reach here!");
+ SDLoc dl(Op);
+ SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
+ SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
+ unsigned SHLAmt = N1.getConstantOperandVal(0);
+ if (SHLAmt % 8 == 0) {
+ SmallVector<int, 16> Mask(16, 0);
+ std::iota(Mask.begin(), Mask.end(), 0);
+ std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
+ if (SDValue Shuffle =
+ DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
+ DAG.getUNDEF(MVT::v16i8), Mask))
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
+ }
+ SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
+ SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
+ DAG.getConstant(SHLAmt, dl, MVT::i32));
+ SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
+ DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
+ SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
+}
+
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
/// is a shuffle we can handle in a single instruction, return it. Otherwise,
/// return the code it can be lowered into. Worst case, it can always be
@@ -9203,6 +9891,18 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+
+ // Any nodes that were combined in the target-independent combiner prior
+ // to vector legalization will not be sent to the target combine. Try to
+ // combine it here.
+ if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
+ if (!isa<ShuffleVectorSDNode>(NewShuffle))
+ return NewShuffle;
+ Op = NewShuffle;
+ SVOp = cast<ShuffleVectorSDNode>(Op);
+ V1 = Op.getOperand(0);
+ V2 = Op.getOperand(1);
+ }
EVT VT = Op.getValueType();
bool isLittleEndian = Subtarget.isLittleEndian();
@@ -9228,6 +9928,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
else
Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
+
+ // If we are loading a partial vector, it does not make sense to adjust
+ // the base pointer. This happens with (splat (s_to_v_permuted (ld))).
+ if (LD->getMemoryVT().getSizeInBits() == (IsFourByte ? 32 : 64))
+ Offset = 0;
SDValue BasePtr = LD->getBasePtr();
if (Offset != 0)
BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
@@ -9266,6 +9971,12 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}
+ if (Subtarget.hasPrefixInstrs()) {
+ SDValue SplatInsertNode;
+ if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
+ return SplatInsertNode;
+ }
+
if (Subtarget.hasP9Altivec()) {
SDValue NewISDNode;
if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
@@ -9501,7 +10212,13 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
MVT::i32));
}
+ ShufflesHandledWithVPERM++;
SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
+ LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
+ LLVM_DEBUG(SVOp->dump());
+ LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
+ LLVM_DEBUG(VPermMask.dump());
+
if (isLittleEndian)
return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
V2, V1, VPermMask);
@@ -9858,18 +10575,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return SDValue();
}
-SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
- // Check for a DIV with the same operands as this REM.
- for (auto UI : Op.getOperand(1)->uses()) {
- if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) ||
- (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV))
- if (UI->getOperand(0) == Op.getOperand(0) &&
- UI->getOperand(1) == Op.getOperand(1))
- return SDValue();
- }
- return Op;
-}
-
// Lower scalar BSWAP64 to xxbrd.
SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -9928,7 +10633,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SDLoc dl(Op);
// Create a stack slot that is 16-byte aligned.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(16, 16, false);
+ int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -9998,7 +10703,7 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
Value);
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(16, 16, false);
+ int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -10139,9 +10844,8 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SDValue Stores[4];
for (unsigned Idx = 0; Idx < 4; ++Idx) {
- SDValue Ex = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
- DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
+ DAG.getVectorIdxConstant(Idx, dl));
SDValue Store;
if (ScalarVT != ScalarMemVT)
Store =
@@ -10198,7 +10902,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
Value);
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(16, 16, false);
+ int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -10247,9 +10951,9 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::v4i32) {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
- SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
- SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
-
+ SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
+ // +16 as shift amt.
+ SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
SDValue RHSSwap = // = vrlw RHS, 16
BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
@@ -10269,13 +10973,6 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
Neg16, DAG, dl);
return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
- } else if (Op.getValueType() == MVT::v8i16) {
- SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
-
- SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
-
- return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
- LHS, RHS, Zero, DAG, dl);
} else if (Op.getValueType() == MVT::v16i8) {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
bool isLittleEndian = Subtarget.isLittleEndian();
@@ -10357,6 +11054,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::FP_EXTEND &&
"Should only be called for ISD::FP_EXTEND");
+ // FIXME: handle extends from half precision float vectors on P9.
// We only want to custom lower an extend from v2f32 to v2f64.
if (Op.getValueType() != MVT::v2f64 ||
Op.getOperand(0).getValueType() != MVT::v2f32)
@@ -10481,6 +11179,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::ABS: return LowerABS(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ISD::ROTL: return LowerROTL(Op, DAG);
// For counter-based loop handling.
case ISD::INTRINSIC_W_CHAIN: return SDValue();
@@ -10493,9 +11192,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INTRINSIC_VOID:
return LowerINTRINSIC_VOID(Op, DAG);
- case ISD::SREM:
- case ISD::UREM:
- return LowerREM(Op, DAG);
case ISD::BSWAP:
return LowerBSWAP(Op, DAG);
case ISD::ATOMIC_CMP_SWAP:
@@ -10514,8 +11210,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
- Results.push_back(RTB);
- Results.push_back(RTB.getValue(1));
+ Results.push_back(
+ DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
Results.push_back(RTB.getValue(2));
break;
}
@@ -10570,6 +11266,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST:
// Don't handle bitcast here.
return;
+ case ISD::FP_EXTEND:
+ SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
+ if (Lowered)
+ Results.push_back(Lowered);
+ return;
}
}
@@ -11170,13 +11871,192 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
return MBB;
}
+bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
+ // If the function specifically requests inline stack probes, emit them.
+ if (MF.getFunction().hasFnAttribute("probe-stack"))
+ return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+ "inline-asm";
+ return false;
+}
+
+unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ unsigned StackAlign = TFI->getStackAlignment();
+ assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
+ "Unexpected stack alignment");
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute.
+ unsigned StackProbeSize = 4096;
+ const Function &Fn = MF.getFunction();
+ if (Fn.hasFnAttribute("stack-probe-size"))
+ Fn.getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, StackProbeSize);
+ // Round down to the stack alignment.
+ StackProbeSize &= ~(StackAlign - 1);
+ return StackProbeSize ? StackProbeSize : StackAlign;
+}
+
+// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
+// into three phases. In the first phase, it uses pseudo instruction
+// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
+// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
+// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
+// MaxCallFrameSize so that it can calculate correct data area pointer.
+MachineBasicBlock *
+PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ const bool isPPC64 = Subtarget.isPPC64();
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+ const unsigned ProbeSize = getStackProbeSize(*MF);
+ const BasicBlock *ProbedBB = MBB->getBasicBlock();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ // The CFG of probing stack looks as
+ // +-----+
+ // | MBB |
+ // +--+--+
+ // |
+ // +----v----+
+ // +--->+ TestMBB +---+
+ // | +----+----+ |
+ // | | |
+ // | +-----v----+ |
+ // +---+ BlockMBB | |
+ // +----------+ |
+ // |
+ // +---------+ |
+ // | TailMBB +<--+
+ // +---------+
+ // In MBB, calculate previous frame pointer and final stack pointer.
+ // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
+ // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
+ // TailMBB is spliced via \p MI.
+ MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
+ MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
+ MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
+
+ MachineFunction::iterator MBBIter = ++MBB->getIterator();
+ MF->insert(MBBIter, TestMBB);
+ MF->insert(MBBIter, BlockMBB);
+ MF->insert(MBBIter, TailMBB);
+
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register NegSizeReg = MI.getOperand(1).getReg();
+ Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+ Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+
+ // Get the canonical FinalStackPtr like what
+ // PPCRegisterInfo::lowerDynamicAlloc does.
+ BuildMI(*MBB, {MI}, DL,
+ TII->get(isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64
+ : PPC::PREPARE_PROBED_ALLOCA_32),
+ FramePointer)
+ .addDef(FinalStackPtr)
+ .addReg(NegSizeReg)
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+
+ // Materialize a scratch register for update.
+ int64_t NegProbeSize = -(int64_t)ProbeSize;
+ assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
+ Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+ if (!isInt<16>(NegProbeSize)) {
+ Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
+ .addImm(NegProbeSize >> 16);
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
+ ScratchReg)
+ .addReg(TempReg)
+ .addImm(NegProbeSize & 0xFFFF);
+ } else
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
+ .addImm(NegProbeSize);
+
+ {
+ // Probing leading residual part.
+ Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
+ .addReg(NegSizeReg)
+ .addReg(ScratchReg);
+ Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
+ .addReg(Div)
+ .addReg(ScratchReg);
+ Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
+ .addReg(Mul)
+ .addReg(NegSizeReg);
+ BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
+ .addReg(FramePointer)
+ .addReg(SPReg)
+ .addReg(NegMod);
+ }
+
+ {
+ // Remaining part should be multiple of ProbeSize.
+ Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
+ BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
+ .addReg(SPReg)
+ .addReg(FinalStackPtr);
+ BuildMI(TestMBB, DL, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_EQ)
+ .addReg(CmpResult)
+ .addMBB(TailMBB);
+ TestMBB->addSuccessor(BlockMBB);
+ TestMBB->addSuccessor(TailMBB);
+ }
+
+ {
+ // Touch the block.
+ // |P...|P...|P...
+ BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
+ .addReg(FramePointer)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
+ BlockMBB->addSuccessor(TestMBB);
+ }
+
+ // Calculation of MaxCallFrameSize is deferred to prologepilog, use
+ // DYNAREAOFFSET pseudo instruction to get the future result.
+ Register MaxCallFrameSizeReg =
+ MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+ BuildMI(TailMBB, DL,
+ TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
+ MaxCallFrameSizeReg)
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
+ BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
+ .addReg(SPReg)
+ .addReg(MaxCallFrameSizeReg);
+
+ // Splice instructions after MI to TailMBB.
+ TailMBB->splice(TailMBB->end(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ MBB->addSuccessor(TestMBB);
+
+ // Delete the pseudo instruction.
+ MI.eraseFromParent();
+
+ ++NumDynamicAllocaProbed;
+ return TailMBB;
+}
+
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
if (MI.getOpcode() == TargetOpcode::STACKMAP ||
MI.getOpcode() == TargetOpcode::PATCHPOINT) {
if (Subtarget.is64BitELFABI() &&
- MI.getOpcode() == TargetOpcode::PATCHPOINT) {
+ MI.getOpcode() == TargetOpcode::PATCHPOINT &&
+ !Subtarget.isUsingPCRelativeCalls()) {
// Call lowering should have added an r2 operand to indicate a dependence
// on the TOC base pointer value. It can't however, because there is no
// way to mark the dependence as implicit there, and so the stackmap code
@@ -11858,12 +12738,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
MachineFrameInfo &MFI = F->getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(8, 8, false);
+ int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
MachineMemOperand *MMOStore = F->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
- MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
+ MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlign(FrameIdx));
// Store the SrcReg into the stack.
BuildMI(*BB, MI, dl, TII->get(StoreOp))
@@ -11873,9 +12753,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addMemOperand(MMOStore);
MachineMemOperand *MMOLoad = F->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
- MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlign(FrameIdx));
// Load from the stack where SrcReg is stored, and save to DestReg,
// so we have done the RegClass conversion from RegClass::SrcReg to
@@ -11935,6 +12815,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addReg(NewFPSCRReg)
.addImm(0)
.addImm(0);
+ } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
+ MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
+ return emitProbedAlloca(MI, BB);
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -13139,15 +14022,20 @@ static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
DAG.getVectorShuffle(Input.getValueType(), dl, Input,
DAG.getUNDEF(Input.getValueType()), ShuffleMask);
- EVT Ty = N->getValueType(0);
- SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle);
- return BV;
+ EVT VT = N->getValueType(0);
+ SDValue Conv = DAG.getBitcast(VT, Shuffle);
+
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ Input.getValueType().getVectorElementType(),
+ VT.getVectorNumElements());
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
+ DAG.getValueType(ExtVT));
}
// Look for build vector patterns where input operands come from sign
// extended vector_extract elements of specific indices. If the correct indices
-// aren't used, add a vector shuffle to fix up the indices and create a new
-// PPCISD:SExtVElems node which selects the vector sign extend instructions
+// aren't used, add a vector shuffle to fix up the indices and create
+// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
// during instruction selection.
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
// This array encodes the indices that the vector sign extend instructions
@@ -13470,8 +14358,8 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
// Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
// aligned and the type is a vector with elements up to 4 bytes
- if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
- && VecTy.getScalarSizeInBits() <= 32 ) {
+ if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
+ VecTy.getScalarSizeInBits() <= 32) {
return SDValue();
}
@@ -13541,8 +14429,8 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
// Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
// aligned and the type is a vector with elements up to 4 bytes
- if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
- && VecTy.getScalarSizeInBits() <= 32 ) {
+ if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
+ VecTy.getScalarSizeInBits() <= 32) {
return SDValue();
}
@@ -13588,7 +14476,7 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
(Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
(Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
- if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Altivec() ||
+ if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
return SDValue();
@@ -13622,6 +14510,210 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
return Val;
}
+static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
+ // Check that the source of the element keeps flipping
+ // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
+ bool PrevElemFromFirstVec = Mask[0] < NumElts;
+ for (int i = 1, e = Mask.size(); i < e; i++) {
+ if (PrevElemFromFirstVec && Mask[i] < NumElts)
+ return false;
+ if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
+ return false;
+ PrevElemFromFirstVec = !PrevElemFromFirstVec;
+ }
+ return true;
+}
+
+static bool isSplatBV(SDValue Op) {
+ if (Op.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ SDValue FirstOp;
+
+ // Find first non-undef input.
+ for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
+ FirstOp = Op.getOperand(i);
+ if (!FirstOp.isUndef())
+ break;
+ }
+
+ // All inputs are undef or the same as the first non-undef input.
+ for (int i = 1, e = Op.getNumOperands(); i < e; i++)
+ if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
+ return false;
+ return true;
+}
+
+static SDValue isScalarToVec(SDValue Op) {
+ if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return Op;
+ if (Op.getOpcode() != ISD::BITCAST)
+ return SDValue();
+ Op = Op.getOperand(0);
+ if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return Op;
+ return SDValue();
+}
+
+static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
+ int LHSMaxIdx, int RHSMinIdx,
+ int RHSMaxIdx, int HalfVec) {
+ for (int i = 0, e = ShuffV.size(); i < e; i++) {
+ int Idx = ShuffV[i];
+ if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
+ ShuffV[i] += HalfVec;
+ }
+ return;
+}
+
+// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
+// the original is:
+// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
+// In such a case, just change the shuffle mask to extract the element
+// from the permuted index.
+static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
+ SDLoc dl(OrigSToV);
+ EVT VT = OrigSToV.getValueType();
+ assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ "Expecting a SCALAR_TO_VECTOR here");
+ SDValue Input = OrigSToV.getOperand(0);
+
+ if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
+ SDValue OrigVector = Input.getOperand(0);
+
+ // Can't handle non-const element indices or different vector types
+ // for the input to the extract and the output of the scalar_to_vector.
+ if (Idx && VT == OrigVector.getValueType()) {
+ SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
+ NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
+ return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
+ }
+ }
+ return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
+ OrigSToV.getOperand(0));
+}
+
+// On little endian subtargets, combine shuffles such as:
+// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
+// into:
+// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
+// because the latter can be matched to a single instruction merge.
+// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
+// to put the value into element zero. Adjust the shuffle mask so that the
+// vector can remain in permuted form (to prevent a swap prior to a shuffle).
+SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG) const {
+ SDValue LHS = SVN->getOperand(0);
+ SDValue RHS = SVN->getOperand(1);
+ auto Mask = SVN->getMask();
+ int NumElts = LHS.getValueType().getVectorNumElements();
+ SDValue Res(SVN, 0);
+ SDLoc dl(SVN);
+
+ // None of these combines are useful on big endian systems since the ISA
+ // already has a big endian bias.
+ if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
+ return Res;
+
+ // If this is not a shuffle of a shuffle and the first element comes from
+ // the second vector, canonicalize to the commuted form. This will make it
+ // more likely to match one of the single instruction patterns.
+ if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
+ RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
+ std::swap(LHS, RHS);
+ Res = DAG.getCommutedVectorShuffle(*SVN);
+ Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
+ }
+
+ // Adjust the shuffle mask if either input vector comes from a
+ // SCALAR_TO_VECTOR and keep the respective input vector in permuted
+ // form (to prevent the need for a swap).
+ SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
+ SDValue SToVLHS = isScalarToVec(LHS);
+ SDValue SToVRHS = isScalarToVec(RHS);
+ if (SToVLHS || SToVRHS) {
+ int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
+ : SToVRHS.getValueType().getVectorNumElements();
+ int NumEltsOut = ShuffV.size();
+
+ // Initially assume that neither input is permuted. These will be adjusted
+ // accordingly if either input is.
+ int LHSMaxIdx = -1;
+ int RHSMinIdx = -1;
+ int RHSMaxIdx = -1;
+ int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
+
+ // Get the permuted scalar to vector nodes for the source(s) that come from
+ // ISD::SCALAR_TO_VECTOR.
+ if (SToVLHS) {
+ // Set up the values for the shuffle vector fixup.
+ LHSMaxIdx = NumEltsOut / NumEltsIn;
+ SToVLHS = getSToVPermuted(SToVLHS, DAG);
+ if (SToVLHS.getValueType() != LHS.getValueType())
+ SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
+ LHS = SToVLHS;
+ }
+ if (SToVRHS) {
+ RHSMinIdx = NumEltsOut;
+ RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
+ SToVRHS = getSToVPermuted(SToVRHS, DAG);
+ if (SToVRHS.getValueType() != RHS.getValueType())
+ SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
+ RHS = SToVRHS;
+ }
+
+ // Fix up the shuffle mask to reflect where the desired element actually is.
+ // The minimum and maximum indices that correspond to element zero for both
+ // the LHS and RHS are computed and will control which shuffle mask entries
+ // are to be changed. For example, if the RHS is permuted, any shuffle mask
+ // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
+ // HalfVec to refer to the corresponding element in the permuted vector.
+ fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
+ HalfVec);
+ Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
+
+ // We may have simplified away the shuffle. We won't be able to do anything
+ // further with it here.
+ if (!isa<ShuffleVectorSDNode>(Res))
+ return Res;
+ Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
+ }
+
+ // The common case after we commuted the shuffle is that the RHS is a splat
+ // and we have elements coming in from the splat at indices that are not
+ // conducive to using a merge.
+ // Example:
+ // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
+ if (!isSplatBV(RHS))
+ return Res;
+
+ // We are looking for a mask such that all even elements are from
+ // one vector and all odd elements from the other.
+ if (!isAlternatingShuffMask(Mask, NumElts))
+ return Res;
+
+ // Adjust the mask so we are pulling in the same index from the splat
+ // as the index from the interesting vector in consecutive elements.
+ // Example (even elements from first vector):
+ // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
+ if (Mask[0] < NumElts)
+ for (int i = 1, e = Mask.size(); i < e; i += 2)
+ ShuffV[i] = (ShuffV[i - 1] + NumElts);
+ // Example (odd elements from first vector):
+ // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
+ else
+ for (int i = 0, e = Mask.size(); i < e; i += 2)
+ ShuffV[i] = (ShuffV[i + 1] + NumElts);
+
+ // If the RHS has undefs, we need to remove them since we may have created
+ // a shuffle that adds those instead of the splat value.
+ SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
+ RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
+
+ Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
+ return Res;
+}
+
SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
LSBaseSDNode *LSBase,
DAGCombinerInfo &DCI) const {
@@ -13693,6 +14785,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return combineSRL(N, DCI);
case ISD::MUL:
return combineMUL(N, DCI);
+ case ISD::FMA:
+ case PPCISD::FNMSUB:
+ return combineFMALike(N, DCI);
case PPCISD::SHL:
if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
return N->getOperand(0);
@@ -13728,7 +14823,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
}
- break;
+ return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
case ISD::STORE: {
EVT Op1VT = N->getOperand(1).getValueType();
@@ -13935,17 +15030,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
EVT MemVT = LD->getMemoryVT();
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
+ Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
- unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
+ Align ScalarABIAlignment = DAG.getDataLayout().getABITypeAlign(STy);
if (LD->isUnindexed() && VT.isVector() &&
((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
// P8 and later hardware should just use LOAD.
- !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
- VT == MVT::v4i32 || VT == MVT::v4f32)) ||
+ !Subtarget.hasP8Vector() &&
+ (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
+ VT == MVT::v4f32)) ||
(Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
- LD->getAlignment() >= ScalarABIAlignment)) &&
- LD->getAlignment() < ABIAlignment) {
+ LD->getAlign() >= ScalarABIAlignment)) &&
+ LD->getAlign() < ABIAlignment) {
// This is a type-legal unaligned Altivec or QPX load.
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
@@ -14492,6 +15588,7 @@ Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
case PPC::DIR_PWR9:
+ case PPC::DIR_PWR10:
case PPC::DIR_PWR_FUTURE: {
if (!ML)
break;
@@ -14898,18 +15995,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
bool isPPC64 = Subtarget.isPPC64();
- bool IsDarwinABI = Subtarget.isDarwinABI();
bool is64Bit = isPPC64 && VT == LLT::scalar(64);
if (!is64Bit && VT != LLT::scalar(32))
report_fatal_error("Invalid register global variable type");
Register Reg = StringSwitch<Register>(RegName)
- .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
- .Case("r2", (IsDarwinABI || isPPC64) ? Register() : PPC::R2)
- .Case("r13", (!isPPC64 && IsDarwinABI) ? Register() :
- (is64Bit ? PPC::X13 : PPC::R13))
- .Default(Register());
+ .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
+ .Case("r2", isPPC64 ? Register() : PPC::R2)
+ .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
+ .Default(Register());
if (Reg)
return Reg;
@@ -15002,7 +16097,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = -VT.getStoreSize()+1;
Info.size = 2*VT.getStoreSize()-1;
- Info.align = Align::None();
+ Info.align = Align(1);
Info.flags = MachineMemOperand::MOLoad;
return true;
}
@@ -15036,7 +16131,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.size = VT.getStoreSize();
- Info.align = Align::None();
+ Info.align = Align(1);
Info.flags = MachineMemOperand::MOLoad;
return true;
}
@@ -15088,7 +16183,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(1);
Info.offset = -VT.getStoreSize()+1;
Info.size = 2*VT.getStoreSize()-1;
- Info.align = Align::None();
+ Info.align = Align(1);
Info.flags = MachineMemOperand::MOStore;
return true;
}
@@ -15121,7 +16216,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.size = VT.getStoreSize();
- Info.align = Align::None();
+ Info.align = Align(1);
Info.flags = MachineMemOperand::MOStore;
return true;
}
@@ -15132,35 +16227,24 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return false;
}
-/// getOptimalMemOpType - Returns the target specific optimal type for load
-/// and store operations as a result of memset, memcpy, and memmove
-/// lowering. If DstAlign is zero that means it's safe to destination
-/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
-/// means there isn't a need to check it against alignment requirement,
-/// probably because the source does not need to be loaded. If 'IsMemset' is
-/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
-/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
-/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT PPCTargetLowering::getOptimalMemOpType(
- uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
- bool ZeroMemset, bool MemcpyStrSrc,
- const AttributeList &FuncAttributes) const {
+ const MemOp &Op, const AttributeList &FuncAttributes) const {
if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
// When expanding a memset, require at least two QPX instructions to cover
// the cost of loading the value to be stored from the constant pool.
- if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
- (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+ if (Subtarget.hasQPX() && Op.size() >= 32 &&
+ (Op.isMemcpy() || Op.size() >= 64) && Op.isAligned(Align(32)) &&
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
return MVT::v4f64;
}
// We should use Altivec/VSX loads and stores when available. For unaligned
// addresses, unaligned VSX loads are only fast starting with the P8.
- if (Subtarget.hasAltivec() && Size >= 16 &&
- (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
- ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+ if (Subtarget.hasAltivec() && Op.size() >= 16 &&
+ (Op.isAligned(Align(16)) ||
+ ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
return MVT::v4i32;
}
@@ -15251,7 +16335,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
if (!VT.isSimple())
return false;
- if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess())
+ if (VT.isFloatingPoint() && !VT.isVector() &&
+ !Subtarget.allowsUnalignedFPAccess())
return false;
if (VT.getSimpleVT().isVector()) {
@@ -15275,22 +16360,48 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const {
- VT = VT.getScalarType();
-
- if (!VT.isSimple())
- return false;
+ return isFMAFasterThanFMulAndFAdd(
+ MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
+}
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::f32:
- case MVT::f64:
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
+ Type *Ty) const {
+ switch (Ty->getScalarType()->getTypeID()) {
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
return true;
- case MVT::f128:
- return (EnableQuadPrecision && Subtarget.hasP9Vector());
+ case Type::FP128TyID:
+ return Subtarget.hasP9Vector();
default:
- break;
+ return false;
}
+}
- return false;
+// Currently this is a copy from AArch64TargetLowering::isProfitableToHoist.
+// FIXME: add more patterns which are profitable to hoist.
+bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
+ if (I->getOpcode() != Instruction::FMul)
+ return true;
+
+ if (!I->hasOneUse())
+ return true;
+
+ Instruction *User = I->user_back();
+ assert(User && "A single use instruction with no uses.");
+
+ if (User->getOpcode() != Instruction::FSub &&
+ User->getOpcode() != Instruction::FAdd)
+ return true;
+
+ const TargetOptions &Options = getTargetMachine().Options;
+ const Function *F = I->getFunction();
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ Type *Ty = User->getOperand(0)->getType();
+
+ return !(
+ isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+ isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
}
const MCPhysReg *
@@ -15306,12 +16417,12 @@ PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
return ScratchRegs;
}
-unsigned PPCTargetLowering::getExceptionPointerRegister(
+Register PPCTargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
}
-unsigned PPCTargetLowering::getExceptionSelectorRegister(
+Register PPCTargetLowering::getExceptionSelectorRegister(
const Constant *PersonalityFn) const {
return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
}
@@ -15342,58 +16453,83 @@ PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
return PPC::createFastISel(FuncInfo, LibInfo);
}
-void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
- if (Subtarget.isDarwinABI()) return;
- if (!Subtarget.isPPC64()) return;
-
- // Update IsSplitCSR in PPCFunctionInfo
- PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
- PFI->setIsSplitCSR(true);
+// 'Inverted' means the FMA opcode after negating one multiplicand.
+// For example, (fma -a b c) = (fnmsub a b c)
+static unsigned invertFMAOpcode(unsigned Opc) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Invalid FMA opcode for PowerPC!");
+ case ISD::FMA:
+ return PPCISD::FNMSUB;
+ case PPCISD::FNMSUB:
+ return ISD::FMA;
+ }
}
-void PPCTargetLowering::insertCopiesSplitCSR(
- MachineBasicBlock *Entry,
- const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
- const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
- if (!IStart)
- return;
+SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOps, bool OptForSize,
+ NegatibleCost &Cost,
+ unsigned Depth) const {
+ if (Depth > SelectionDAG::MaxRecursionDepth)
+ return SDValue();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
- MachineBasicBlock::iterator MBBI = Entry->begin();
- for (const MCPhysReg *I = IStart; *I; ++I) {
- const TargetRegisterClass *RC = nullptr;
- if (PPC::G8RCRegClass.contains(*I))
- RC = &PPC::G8RCRegClass;
- else if (PPC::F8RCRegClass.contains(*I))
- RC = &PPC::F8RCRegClass;
- else if (PPC::CRRCRegClass.contains(*I))
- RC = &PPC::CRRCRegClass;
- else if (PPC::VRRCRegClass.contains(*I))
- RC = &PPC::VRRCRegClass;
- else
- llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+ unsigned Opc = Op.getOpcode();
+ EVT VT = Op.getValueType();
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+
+ switch (Opc) {
+ case PPCISD::FNMSUB:
+ // TODO: QPX subtarget is deprecated. No transformation here.
+ if (!Op.hasOneUse() || !isTypeLegal(VT) || Subtarget.hasQPX())
+ break;
+
+ const TargetOptions &Options = getTargetMachine().Options;
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+ SDLoc Loc(Op);
+
+ NegatibleCost N2Cost = NegatibleCost::Expensive;
+ SDValue NegN2 =
+ getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
+
+ if (!NegN2)
+ return SDValue();
- Register NewVR = MRI->createVirtualRegister(RC);
- // Create copy from CSR to a virtual register.
- // FIXME: this currently does not emit CFI pseudo-instructions, it works
- // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
- // nounwind. If we want to generalize this later, we may need to emit
- // CFI pseudo-instructions.
- assert(Entry->getParent()->getFunction().hasFnAttribute(
- Attribute::NoUnwind) &&
- "Function should be nounwind in insertCopiesSplitCSR!");
- Entry->addLiveIn(*I);
- BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
- .addReg(*I);
+ // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
+ // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
+ // These transformations may change sign of zeroes. For example,
+ // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
+ if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
+ // Try and choose the cheaper one to negate.
+ NegatibleCost N0Cost = NegatibleCost::Expensive;
+ SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
+ N0Cost, Depth + 1);
+
+ NegatibleCost N1Cost = NegatibleCost::Expensive;
+ SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
+ N1Cost, Depth + 1);
+
+ if (NegN0 && N0Cost <= N1Cost) {
+ Cost = std::min(N0Cost, N2Cost);
+ return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
+ } else if (NegN1) {
+ Cost = std::min(N1Cost, N2Cost);
+ return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
+ }
+ }
+
+ // (fneg (fnmsub a b c)) => (fma a b (fneg c))
+ if (isOperationLegal(ISD::FMA, VT)) {
+ Cost = N2Cost;
+ return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
+ }
- // Insert the copy-back instructions right before the terminator.
- for (auto *Exit : Exits)
- BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
- TII->get(TargetOpcode::COPY), *I)
- .addReg(NewVR);
+ break;
}
+
+ return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
+ Cost, Depth);
}
// Override to enable LOAD_STACK_GUARD lowering on Linux.
@@ -15421,6 +16557,13 @@ bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return false;
case MVT::f32:
case MVT::f64:
+ if (Subtarget.hasPrefixInstrs()) {
+ // With prefixed instructions, we can materialize anything that can be
+ // represented with a 32-bit immediate, not just positive zero.
+ APFloat APFloatOfImm = Imm;
+ return convertToNonDenormSingle(APFloatOfImm);
+ }
+ LLVM_FALLTHROUGH;
case MVT::ppcf128:
return Imm.isPosZero();
}
@@ -15591,10 +16734,59 @@ static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Transform
+// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
+// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
+// In this case both C1 and C2 must be known constants.
+// C1+C2 must fit into a 34 bit signed integer.
+static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ if (!Subtarget.isUsingPCRelativeCalls())
+ return SDValue();
+
+ // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
+ // If we find that node try to cast the Global Address and the Constant.
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
+ std::swap(LHS, RHS);
+
+ if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
+ return SDValue();
+
+ // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
+ GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
+ ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
+
+ // Check that both casts succeeded.
+ if (!GSDN || !ConstNode)
+ return SDValue();
+
+ int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
+ SDLoc DL(GSDN);
+
+ // The signed int offset needs to fit in 34 bits.
+ if (!isInt<34>(NewOffset))
+ return SDValue();
+
+ // The new global address is a copy of the old global address except
+ // that it has the updated Offset.
+ SDValue GA =
+ DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
+ NewOffset, GSDN->getTargetFlags());
+ SDValue MatPCRel =
+ DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
+ return MatPCRel;
+}
+
SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
return Value;
+ if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
+ return Value;
+
return SDValue();
}
@@ -15619,6 +16811,24 @@ SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
SDLoc dl(N);
SDValue Op0 = N->getOperand(0);
+ // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
+ if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+ return SDValue();
+ SDValue Sub = Op0.getOperand(0);
+ if (Sub.getOpcode() == ISD::SUB) {
+ SDValue SubOp0 = Sub.getOperand(0);
+ SDValue SubOp1 = Sub.getOperand(1);
+ if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
+ (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
+ return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
+ SubOp1.getOperand(0),
+ DCI.DAG.getTargetConstant(0, dl, MVT::i32));
+ }
+ }
+ }
+
// Looking for a truncate of i128 to i64.
if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
return SDValue();
@@ -15673,6 +16883,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
// vector 7 2 2
return true;
case PPC::DIR_PWR9:
+ case PPC::DIR_PWR10:
case PPC::DIR_PWR_FUTURE:
// type mul add shl
// scalar 5 2 2
@@ -15734,6 +16945,44 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
}
}
+// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
+// in combiner since we need to check SD flags and other subtarget features.
+SDValue PPCTargetLowering::combineFMALike(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDNodeFlags Flags = N->getFlags();
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetOptions &Options = getTargetMachine().Options;
+ unsigned Opc = N->getOpcode();
+ bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ bool LegalOps = !DCI.isBeforeLegalizeOps();
+ SDLoc Loc(N);
+
+ // TODO: QPX subtarget is deprecated. No transformation here.
+ if (Subtarget.hasQPX() || !isOperationLegal(ISD::FMA, VT))
+ return SDValue();
+
+ // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
+ // since (fnmsub a b c)=-0 while c-ab=+0.
+ if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
+ return SDValue();
+
+ // (fma (fneg a) b c) => (fnmsub a b c)
+ // (fnmsub (fneg a) b c) => (fma a b c)
+ if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
+ return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
+
+ // (fma a (fneg b) c) => (fnmsub a b c)
+ // (fnmsub a (fneg b) c) => (fma a b c)
+ if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
+ return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
+
+ return SDValue();
+}
+
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
if (!Subtarget.is64BitELFABI())
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index e0c381827b873..768eaa43e0135 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -85,19 +85,10 @@ namespace llvm {
/// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
VEXTS,
- /// SExtVElems, takes an input vector of a smaller type and sign
- /// extends to an output vector of a larger type.
- SExtVElems,
-
/// Reciprocal estimate instructions (unary FP ops).
FRE,
FRSQRTE,
- // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
- // three v4f32 operands and producing a v4f32 result.
- VMADDFP,
- VNMSUBFP,
-
/// VPERM - The PPC VPERM Instruction.
///
VPERM,
@@ -106,6 +97,15 @@ namespace llvm {
///
XXSPLT,
+ /// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for
+ /// converting immediate single precision numbers to double precision
+ /// vector or scalar.
+ XXSPLTI_SP_TO_DP,
+
+ /// XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
+ ///
+ XXSPLTI32DX,
+
/// VECINSERT - The PPC vector insert instruction
///
VECINSERT,
@@ -142,6 +142,10 @@ namespace llvm {
/// dynamic alloca.
DYNAREAOFFSET,
+ /// To avoid stack clash, allocation is performed by block and each block is
+ /// probed.
+ PROBED_ALLOCA,
+
/// GlobalBaseReg - On Darwin, this node represents the result of the mflr
/// at function entry, used for PIC code.
GlobalBaseReg,
@@ -157,6 +161,9 @@ namespace llvm {
SRA,
SHL,
+ /// FNMSUB - Negated multiply-subtract instruction.
+ FNMSUB,
+
/// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
/// word and shift left immediate.
EXTSWSLI,
@@ -169,9 +176,11 @@ namespace llvm {
/// CALL - A direct function call.
/// CALL_NOP is a call with the special NOP which follows 64-bit
+ /// CALL_NOTOC the caller does not use the TOC.
/// SVR4 calls and 32-bit/64-bit AIX calls.
CALL,
CALL_NOP,
+ CALL_NOTOC,
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
@@ -225,6 +234,14 @@ namespace llvm {
/// As with SINT_VEC_TO_FP, used for converting illegal types.
UINT_VEC_TO_FP,
+ /// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to
+ /// place the value into the least significant element of the most
+ /// significant doubleword in the vector. This is not element zero for
+ /// anything smaller than a doubleword on either endianness. This node has
+ /// the same semantics as SCALAR_TO_VECTOR except that the value remains in
+ /// the aforementioned location in the vector register.
+ SCALAR_TO_VECTOR_PERMUTED,
+
// FIXME: Remove these once the ANDI glue bug is fixed:
/// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
/// eq or gt bit of CR0 after executing andi. x, 1. This is used to
@@ -430,6 +447,11 @@ namespace llvm {
/// lower (IDX=1) half of v4f32 to v2f64.
FP_EXTEND_HALF,
+ /// MAT_PCREL_ADDR = Materialize a PC Relative address. This can be done
+ /// either through an add like PADDI or through a PC Relative load like
+ /// PLD.
+ MAT_PCREL_ADDR,
+
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
@@ -637,7 +659,7 @@ namespace llvm {
/// then the VPERM for the shuffle. All in all a very slow sequence.
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
const override {
- if (VT.getScalarSizeInBits() % 8 == 0)
+ if (VT.getVectorNumElements() != 1 && VT.getScalarSizeInBits() % 8 == 0)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
@@ -676,17 +698,9 @@ namespace llvm {
return VT.isScalarInteger();
}
- bool supportSplitCSR(MachineFunction *MF) const override {
- return
- MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
- MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
- }
-
- void initializeSplitCSR(MachineBasicBlock *Entry) const override;
-
- void insertCopiesSplitCSR(
- MachineBasicBlock *Entry,
- const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+ SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps,
+ bool OptForSize, NegatibleCost &Cost,
+ unsigned Depth = 0) const override;
/// getSetCCResultType - Return the ISD::SETCC ValueType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
@@ -716,7 +730,7 @@ namespace llvm {
/// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
SelectionDAG &DAG,
- unsigned EncodingAlignment = 0) const;
+ MaybeAlign EncodingAlignment = None) const;
/// SelectAddressRegImm - Returns true if the address N can be represented
/// by a base register plus a signed 16-bit displacement [r+imm], and if it
@@ -725,13 +739,17 @@ namespace llvm {
/// requirement, i.e. multiples of 4 for DS form.
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG,
- unsigned EncodingAlignment) const;
+ MaybeAlign EncodingAlignment) const;
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
SelectionDAG &DAG) const;
+ /// SelectAddressPCRel - Represent the specified address as pc relative to
+ /// be represented as [pc+imm]
+ bool SelectAddressPCRel(SDValue N, SDValue &Base) const;
+
Sched::Preference getSchedulingPreference(SDNode *N) const override;
/// LowerOperation - Provide custom lowering hooks for some operations.
@@ -794,6 +812,13 @@ namespace llvm {
MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
+ MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
+
+ bool hasInlineStackProbe(MachineFunction &MF) const override;
+
+ unsigned getStackProbeSize(MachineFunction &MF) const;
+
ConstraintType getConstraintType(StringRef Constraint) const override;
/// Examine constraint string and operand type and determine a weight value.
@@ -879,7 +904,7 @@ namespace llvm {
if (VT != MVT::f32 && VT != MVT::f64)
return false;
- return true;
+ return true;
}
// Returns true if the address of the global is stored in TOC entry.
@@ -892,21 +917,10 @@ namespace llvm {
MachineFunction &MF,
unsigned Intrinsic) const override;
- /// getOptimalMemOpType - Returns the target specific optimal type for load
- /// and store operations as a result of memset, memcpy, and memmove
- /// lowering. If DstAlign is zero that means it's safe to destination
- /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
- /// means there isn't a need to check it against alignment requirement,
- /// probably because the source does not need to be loaded. If 'IsMemset' is
- /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
- /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
- /// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
- EVT
- getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
@@ -922,6 +936,14 @@ namespace llvm {
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
+
+ /// isProfitableToHoist - Check if it is profitable to hoist instruction
+ /// \p I to its dominator block.
+ /// For example, it is not profitable if \p I and it's only user can form a
+ /// FMA instruction, because Powerpc prefers FMADD.
+ bool isProfitableToHoist(Instruction *I) const override;
+
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
// Should we expand the build vector with shuffles?
@@ -950,14 +972,19 @@ namespace llvm {
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
- unsigned
+ Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override;
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
- unsigned
+ Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+ /// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a
+ /// specific type is cheaper than a multiply followed by a shift.
+ /// This is true for words and doublewords on 64-bit PowerPC.
+ bool isMulhCheaperThanMulShift(EVT Type) const override;
+
/// Override to support customized stack guard loading.
bool useLoadStackGuardNode() const override;
void insertSSPDeclarations(Module &M) const override;
@@ -973,6 +1000,24 @@ namespace llvm {
unsigned JTI,
MCContext &Ctx) const override;
+ /// Structure that collects some common arguments that get passed around
+ /// between the functions for call lowering.
+ struct CallFlags {
+ const CallingConv::ID CallConv;
+ const bool IsTailCall : 1;
+ const bool IsVarArg : 1;
+ const bool IsPatchPoint : 1;
+ const bool IsIndirect : 1;
+ const bool HasNest : 1;
+ const bool NoMerge : 1;
+
+ CallFlags(CallingConv::ID CC, bool IsTailCall, bool IsVarArg,
+ bool IsPatchPoint, bool IsIndirect, bool HasNest, bool NoMerge)
+ : CallConv(CC), IsTailCall(IsTailCall), IsVarArg(IsVarArg),
+ IsPatchPoint(IsPatchPoint), IsIndirect(IsIndirect),
+ HasNest(HasNest), NoMerge(NoMerge) {}
+ };
+
private:
struct ReuseLoadInfo {
SDValue Ptr;
@@ -981,7 +1026,7 @@ namespace llvm {
MachinePointerInfo MPI;
bool IsDereferenceable = false;
bool IsInvariant = false;
- unsigned Alignment = 0;
+ Align Alignment;
AAMDNodes AAInfo;
const MDNode *Ranges = nullptr;
@@ -1032,15 +1077,10 @@ namespace llvm {
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const;
- bool
- IsEligibleForTailCallOptimization_64SVR4(
- SDValue Callee,
- CallingConv::ID CalleeCC,
- ImmutableCallSite CS,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const;
+ bool IsEligibleForTailCallOptimization_64SVR4(
+ SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB,
+ bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff,
SDValue Chain, SDValue &LROpOut,
@@ -1083,7 +1123,6 @@ namespace llvm {
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -1091,6 +1130,7 @@ namespace llvm {
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
@@ -1100,15 +1140,14 @@ namespace llvm {
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
- SDValue FinishCall(CallingConv::ID CallConv, const SDLoc &dl,
- bool isTailCall, bool isVarArg, bool isPatchPoint,
- bool hasNest, SelectionDAG &DAG,
+
+ SDValue FinishCall(CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
SDValue InFlag, SDValue Chain, SDValue CallSeqStart,
SDValue &Callee, int SPDiff, unsigned NumBytes,
const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const;
+ const CallBase *CB) const;
SDValue
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
@@ -1155,42 +1194,34 @@ namespace llvm {
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
const SDLoc &dl) const;
- SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const;
- SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ const CallBase *CB) const;
+ SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const;
- SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ const CallBase *CB) const;
+ SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const;
- SDValue LowerCall_AIX(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall, bool isPatchPoint,
+ const CallBase *CB) const;
+ SDValue LowerCall_AIX(SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
- ImmutableCallSite CS) const;
+ const CallBase *CB) const;
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
@@ -1206,10 +1237,13 @@ namespace llvm {
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG) const;
SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,
DAGCombinerInfo &DCI) const;
@@ -1240,6 +1274,10 @@ namespace llvm {
/// essentially v16i8 vector version of VINSERTH.
SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
+ /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
+ /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1.
+ SDValue lowerToXXSPLTI32DX(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
+
// Return whether the call instruction can potentially be optimized to a
// tail call. This will cause the optimizers to attempt to move, or
// duplicate return instructions to help enable tail call optimizations.
@@ -1258,6 +1296,9 @@ namespace llvm {
bool isIntS16Immediate(SDNode *N, int16_t &Imm);
bool isIntS16Immediate(SDValue Op, int16_t &Imm);
+ bool convertToNonDenormSingle(APInt &ArgAPInt);
+ bool convertToNonDenormSingle(APFloat &ArgAPFloat);
+
} // end namespace llvm
#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 43431a1e00698..1c457d4170d54 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -140,6 +140,15 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
(outs), (ins abscalltarget:$func),
"bla $func\n\tnop", IIC_BrB,
[(PPCcall_nop (i64 imm:$func))]>;
+ let Predicates = [PCRelativeMemops] in {
+ // BL8_NOTOC means that the caller does not use the TOC pointer and if
+ // it does use R2 then it is just a caller saved register. Therefore it is
+ // safe to emit only the bl and not the nop for this instruction. The
+ // linker will not try to restore R2 after the call.
+ def BL8_NOTOC : IForm<18, 0, 1, (outs),
+ (ins calltarget:$func),
+ "bl $func", IIC_BrB, []>;
+ }
}
let Uses = [CTR8, RM] in {
let isPredicable = 1 in
@@ -194,6 +203,11 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
(BL8_NOP texternalsym:$dst)>;
+def : Pat<(PPCcall_notoc (i64 tglobaladdr:$dst)),
+ (BL8_NOTOC tglobaladdr:$dst)>;
+def : Pat<(PPCcall_notoc (i64 texternalsym:$dst)),
+ (BL8_NOTOC texternalsym:$dst)>;
+
// Calls for AIX
def : Pat<(PPCcall (i64 mcsym:$dst)),
(BL8 mcsym:$dst)>;
@@ -411,6 +425,19 @@ def DYNALLOC8 : PPCEmitTimePseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri
(PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
def DYNAREAOFFSET8 : PPCEmitTimePseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
[(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
+// Probed alloca to support stack clash protection.
+let Defs = [X1], Uses = [X1], hasNoSchedulingInfo = 1 in {
+def PROBED_ALLOCA_64 : PPCCustomInserterPseudo<(outs g8rc:$result),
+ (ins g8rc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_64",
+ [(set i64:$result,
+ (PPCprobedalloca i64:$negsize, iaddr:$fpsi))]>;
+def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs g8rc:$fp,
+ g8rc:$sp),
+ (ins g8rc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_64", []>;
+def PROBED_STACKALLOC_64 : PPCEmitTimePseudo<(outs g8rc:$scratch, g8rc:$temp),
+ (ins i64imm:$stacksize),
+ "#PROBED_STACKALLOC_64", []>;
+}
let hasSideEffects = 0 in {
let Defs = [LR8] in {
@@ -772,8 +799,9 @@ def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
"popcntw $rA, $rS", IIC_IntGeneral,
[(set i32:$rA, (ctpop i32:$rS))]>;
-def POPCNTB : XForm_11<31, 122, (outs gprc:$rA), (ins gprc:$rS),
- "popcntb $rA, $rS", IIC_IntGeneral, []>;
+def POPCNTB : XForm_11<31, 122, (outs g8rc:$rA), (ins g8rc:$rS),
+ "popcntb $rA, $rS", IIC_IntGeneral,
+ [(set i64:$rA, (int_ppc_popcntb i64:$rS))]>;
defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"divd", "$rT, $rA, $rB", IIC_IntDivD,
@@ -909,6 +937,104 @@ def ISEL8 : AForm_4<31, 15,
} // hasSideEffects = 0
} // End FXU Operations.
+def : InstAlias<"li $rD, $imm", (ADDI8 g8rc:$rD, ZERO8, s16imm64:$imm)>;
+def : InstAlias<"lis $rD, $imm", (ADDIS8 g8rc:$rD, ZERO8, s17imm64:$imm)>;
+
+def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"mr. $rA, $rB", (OR8_rec g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+
+def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"not. $rA, $rB", (NOR8_rec g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+
+def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
+
+def : InstAlias<"sub $rA, $rB, $rC", (SUBF8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8_rec g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8_rec g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+
+def : InstAlias<"rotlwi $rA, $rS, $n", (RLWINM8 g8rc:$rA, g8rc:$rS, u5imm:$n, 0, 31)>;
+def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINM8_rec g8rc:$rA, g8rc:$rS, u5imm:$n, 0, 31)>;
+def : InstAlias<"rotlw $rA, $rS, $rB", (RLWNM8 g8rc:$rA, g8rc:$rS, g8rc:$rB, 0, 31)>;
+def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNM8_rec g8rc:$rA, g8rc:$rS, g8rc:$rB, 0, 31)>;
+def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM8 g8rc:$rA, g8rc:$rS, 0, u5imm:$n, 31)>;
+def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINM8_rec g8rc:$rA, g8rc:$rS, 0, u5imm:$n, 31)>;
+
+def : InstAlias<"isellt $rT, $rA, $rB",
+ (ISEL8 g8rc:$rT, g8rc_nox0:$rA, g8rc:$rB, CR0LT)>;
+def : InstAlias<"iselgt $rT, $rA, $rB",
+ (ISEL8 g8rc:$rT, g8rc_nox0:$rA, g8rc:$rB, CR0GT)>;
+def : InstAlias<"iseleq $rT, $rA, $rB",
+ (ISEL8 g8rc:$rT, g8rc_nox0:$rA, g8rc:$rB, CR0EQ)>;
+
+def : InstAlias<"nop", (ORI8 X0, X0, 0)>;
+def : InstAlias<"xnop", (XORI8 X0, X0, 0)>;
+
+def : InstAlias<"cntlzw $rA, $rS", (CNTLZW8 g8rc:$rA, g8rc:$rS)>;
+def : InstAlias<"cntlzw. $rA, $rS", (CNTLZW8_rec g8rc:$rA, g8rc:$rS)>;
+
+def : InstAlias<"mtxer $Rx", (MTSPR8 1, g8rc:$Rx)>;
+def : InstAlias<"mfxer $Rx", (MFSPR8 g8rc:$Rx, 1)>;
+
+def : InstAlias<"mtudscr $Rx", (MTSPR8 3, g8rc:$Rx)>;
+def : InstAlias<"mfudscr $Rx", (MFSPR8 g8rc:$Rx, 3)>;
+
+def : InstAlias<"mfrtcu $Rx", (MFSPR8 g8rc:$Rx, 4)>;
+def : InstAlias<"mfrtcl $Rx", (MFSPR8 g8rc:$Rx, 5)>;
+
+def : InstAlias<"mtlr $Rx", (MTSPR8 8, g8rc:$Rx)>;
+def : InstAlias<"mflr $Rx", (MFSPR8 g8rc:$Rx, 8)>;
+
+def : InstAlias<"mtctr $Rx", (MTSPR8 9, g8rc:$Rx)>;
+def : InstAlias<"mfctr $Rx", (MFSPR8 g8rc:$Rx, 9)>;
+
+def : InstAlias<"mtuamr $Rx", (MTSPR8 13, g8rc:$Rx)>;
+def : InstAlias<"mfuamr $Rx", (MFSPR8 g8rc:$Rx, 13)>;
+
+def : InstAlias<"mtdscr $Rx", (MTSPR8 17, g8rc:$Rx)>;
+def : InstAlias<"mfdscr $Rx", (MFSPR8 g8rc:$Rx, 17)>;
+
+def : InstAlias<"mtdsisr $Rx", (MTSPR8 18, g8rc:$Rx)>;
+def : InstAlias<"mfdsisr $Rx", (MFSPR8 g8rc:$Rx, 18)>;
+
+def : InstAlias<"mtdar $Rx", (MTSPR8 19, g8rc:$Rx)>;
+def : InstAlias<"mfdar $Rx", (MFSPR8 g8rc:$Rx, 19)>;
+
+def : InstAlias<"mtdec $Rx", (MTSPR8 22, g8rc:$Rx)>;
+def : InstAlias<"mfdec $Rx", (MFSPR8 g8rc:$Rx, 22)>;
+
+def : InstAlias<"mtsdr1 $Rx", (MTSPR8 25, g8rc:$Rx)>;
+def : InstAlias<"mfsdr1 $Rx", (MFSPR8 g8rc:$Rx, 25)>;
+
+def : InstAlias<"mtsrr0 $Rx", (MTSPR8 26, g8rc:$Rx)>;
+def : InstAlias<"mfsrr0 $Rx", (MFSPR8 g8rc:$Rx, 26)>;
+
+def : InstAlias<"mtsrr1 $Rx", (MTSPR8 27, g8rc:$Rx)>;
+def : InstAlias<"mfsrr1 $Rx", (MFSPR8 g8rc:$Rx, 27)>;
+
+def : InstAlias<"mtcfar $Rx", (MTSPR8 28, g8rc:$Rx)>;
+def : InstAlias<"mfcfar $Rx", (MFSPR8 g8rc:$Rx, 28)>;
+
+def : InstAlias<"mtamr $Rx", (MTSPR8 29, g8rc:$Rx)>;
+def : InstAlias<"mfamr $Rx", (MFSPR8 g8rc:$Rx, 29)>;
+
+foreach SPRG = 0-3 in {
+ def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR8 g8rc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR8 g8rc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mfsprg "#SPRG#", $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
+}
+
+def : InstAlias<"mfasr $RT", (MFSPR8 g8rc:$RT, 280)>;
+def : InstAlias<"mtasr $RT", (MTSPR8 280, g8rc:$RT)>;
+
+def : InstAlias<"mttbl $Rx", (MTSPR8 284, g8rc:$Rx)>;
+def : InstAlias<"mttbu $Rx", (MTSPR8 285, g8rc:$Rx)>;
+
+def : InstAlias<"mfpvr $RT", (MFSPR8 g8rc:$RT, 287)>;
+
+def : InstAlias<"mfspefscr $Rx", (MFSPR8 g8rc:$Rx, 512)>;
+def : InstAlias<"mtspefscr $Rx", (MTSPR8 512, g8rc:$Rx)>;
//===----------------------------------------------------------------------===//
// Load/Store instructions.
@@ -1110,7 +1236,7 @@ def ADDISgotTprelHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16
(PPCaddisGotTprelHA i64:$reg,
tglobaltlsaddr:$disp))]>,
isPPC64;
-def LDgotTprelL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
+def LDgotTprelL: PPCEmitTimePseudo<(outs g8rc_nox0:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
"#LDgotTprelL",
[(set i64:$rD,
(PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index f94816a35f796..920eeed9d41ff 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -341,7 +341,7 @@ class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
//===----------------------------------------------------------------------===//
// Instruction Definitions.
-def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">;
+def HasAltivec : Predicate<"Subtarget->hasAltivec()">;
let Predicates = [HasAltivec] in {
def DSS : DSS_Form<0, 822, (outs), (ins u5imm:$STRM),
@@ -491,7 +491,7 @@ let isCommutable = 1 in {
def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vaddfp $vD, $vA, $vB", IIC_VecFP,
[(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
-
+
def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vaddubm $vD, $vA, $vB", IIC_VecGeneral,
[(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
@@ -501,7 +501,7 @@ def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vadduwm $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
-
+
def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>;
def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>;
def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>;
@@ -635,7 +635,7 @@ def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub,
def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh,
v4i32, v8i16>;
} // isCommutable
-
+
def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>;
def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>;
def VRFIN : VX2_Int_SP<522, "vrfin", int_ppc_altivec_vrfin>;
@@ -657,7 +657,7 @@ def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vsubuwm $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
-
+
def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>;
def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>;
def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>;
@@ -869,6 +869,26 @@ def : Pat<(v8i16 (rotl v8i16:$vA, v8i16:$vB)),
def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)),
(v4i32 (VRLW v4i32:$vA, v4i32:$vB))>;
+// Multiply
+def : Pat<(mul v8i16:$vA, v8i16:$vB), (VMLADDUHM $vA, $vB, (v8i16(V_SET0H)))>;
+
+// Add
+def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC), (VMLADDUHM $vA, $vB, $vC)>;
+
+// Saturating adds/subtracts.
+def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>;
+def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>;
+def : Pat<(v8i16 (saddsat v8i16:$vA, v8i16:$vB)), (v8i16 (VADDSHS $vA, $vB))>;
+def : Pat<(v8i16 (uaddsat v8i16:$vA, v8i16:$vB)), (v8i16 (VADDUHS $vA, $vB))>;
+def : Pat<(v4i32 (saddsat v4i32:$vA, v4i32:$vB)), (v4i32 (VADDSWS $vA, $vB))>;
+def : Pat<(v4i32 (uaddsat v4i32:$vA, v4i32:$vB)), (v4i32 (VADDUWS $vA, $vB))>;
+def : Pat<(v16i8 (ssubsat v16i8:$vA, v16i8:$vB)), (v16i8 (VSUBSBS $vA, $vB))>;
+def : Pat<(v16i8 (usubsat v16i8:$vA, v16i8:$vB)), (v16i8 (VSUBUBS $vA, $vB))>;
+def : Pat<(v8i16 (ssubsat v8i16:$vA, v8i16:$vB)), (v8i16 (VSUBSHS $vA, $vB))>;
+def : Pat<(v8i16 (usubsat v8i16:$vA, v8i16:$vB)), (v8i16 (VSUBUHS $vA, $vB))>;
+def : Pat<(v4i32 (ssubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBSWS $vA, $vB))>;
+def : Pat<(v4i32 (usubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBUWS $vA, $vB))>;
+
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
@@ -1002,14 +1022,9 @@ def : Pat<(and v4i32:$A, (vnot_ppc v4i32:$B)),
def : Pat<(fmul v4f32:$vA, v4f32:$vB),
(VMADDFP $vA, $vB,
- (v4i32 (VSLW (v4i32 (V_SETALLONES)), (v4i32 (V_SETALLONES)))))>;
+ (v4i32 (VSLW (v4i32 (V_SETALLONES)), (v4i32 (V_SETALLONES)))))>;
-// Fused multiply add and multiply sub for packed float. These are represented
-// separately from the real instructions above, for operations that must have
-// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
-def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
- (VMADDFP $A, $B, $C)>;
-def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C),
(VNMSUBFP $A, $B, $C)>;
def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
@@ -1121,8 +1136,8 @@ def : Pat<(v16i8 (srl (sub v16i8:$vA, (v16i8 (bitconvert(vnot_ppc v4i32:$vB)))),
} // end HasAltivec
-def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
-def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">;
+def HasP8Altivec : Predicate<"Subtarget->hasP8Altivec()">;
+def HasP8Crypto : Predicate<"Subtarget->hasP8Crypto()">;
let Predicates = [HasP8Altivec] in {
let isCommutable = 1 in {
@@ -1143,7 +1158,7 @@ def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
} // isCommutable
-// Vector merge
+// Vector merge
def VMRGEW : VXForm_1<1932, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmrgew $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD,
@@ -1251,16 +1266,16 @@ def VPOPCNTD : VXForm_2<1987, (outs vrrc:$vD), (ins vrrc:$vB),
[(set v2i64:$vD, (ctpop v2i64:$vB))]>;
let isCommutable = 1 in {
-// FIXME: Use AddedComplexity > 400 to ensure these patterns match before the
+// FIXME: Use AddedComplexity > 400 to ensure these patterns match before the
// VSX equivalents. We need to fix this up at some point. Two possible
// solutions for this problem:
// 1. Disable Altivec patterns that compete with VSX patterns using the
-// !HasVSX predicate. This essentially favours VSX over Altivec, in
-// hopes of reducing register pressure (larger register set using VSX
+// !HasVSX predicate. This essentially favours VSX over Altivec, in
+// hopes of reducing register pressure (larger register set using VSX
// instructions than VMX instructions)
// 2. Employ a more disciplined use of AddedComplexity, which would provide
// more fine-grained control than option 1. This would be beneficial
-// if we find situations where Altivec is really preferred over VSX.
+// if we find situations where Altivec is really preferred over VSX.
def VEQV : VXForm_1<1668, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"veqv $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (vnot_ppc (xor v4i32:$vA, v4i32:$vB)))]>;
@@ -1339,9 +1354,13 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
} // HasP8Crypto
// The following altivec instructions were introduced in Power ISA 3.0
-def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">;
+def HasP9Altivec : Predicate<"Subtarget->hasP9Altivec()">;
let Predicates = [HasP9Altivec] in {
+// Vector Multiply-Sum
+def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm,
+ v1i128, v2i64, v1i128>;
+
// i8 element comparisons.
def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>;
def VCMPNEB_rec : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 115bd44ea202e..632d4d9deb8a2 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -39,7 +39,11 @@ class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
// Indicate that this instruction is of type X-Form Load or Store
bits<1> XFormMemOp = 0;
- let TSFlags{7} = XFormMemOp;
+ let TSFlags{6} = XFormMemOp;
+
+ // Indicate that this instruction is prefixed.
+ bits<1> Prefixed = 0;
+ let TSFlags{7} = Prefixed;
// Fields used for relation models.
string BaseName = "";
diff --git a/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index 6cbf999ca73dc..992ad8216f3bd 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -13,7 +13,7 @@
-def HasHTM : Predicate<"PPCSubTarget->hasHTM()">;
+def HasHTM : Predicate<"Subtarget->hasHTM()">;
def HTM_get_imm : SDNodeXForm<imm, [{
return getI32Imm (N->getZExtValue(), SDLoc(N));
@@ -169,3 +169,8 @@ def : Pat<(i64 (int_ppc_ttest)),
36, 28)>;
} // [HasHTM]
+
+def : InstAlias<"tend.", (TEND 0)>, Requires<[HasHTM]>;
+def : InstAlias<"tendall.", (TEND 1)>, Requires<[HasHTM]>;
+def : InstAlias<"tsuspend.", (TSR 0)>, Requires<[HasHTM]>;
+def : InstAlias<"tresume.", (TSR 1)>, Requires<[HasHTM]>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 30906a32b00c5..11c97210ead9b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -19,6 +19,7 @@
#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -72,27 +73,6 @@ static cl::opt<bool>
UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
cl::desc("Use the old (incorrect) instruction latency calculation"));
-// Index into the OpcodesForSpill array.
-enum SpillOpcodeKey {
- SOK_Int4Spill,
- SOK_Int8Spill,
- SOK_Float8Spill,
- SOK_Float4Spill,
- SOK_CRSpill,
- SOK_CRBitSpill,
- SOK_VRVectorSpill,
- SOK_VSXVectorSpill,
- SOK_VectorFloat8Spill,
- SOK_VectorFloat4Spill,
- SOK_VRSaveSpill,
- SOK_QuadFloat8Spill,
- SOK_QuadFloat4Spill,
- SOK_QuadBitSpill,
- SOK_SpillToVSR,
- SOK_SPESpill,
- SOK_LastOpcodeSpill // This must be last on the enum.
-};
-
// Pin the vtable to this file.
void PPCInstrInfo::anchor() {}
@@ -225,13 +205,42 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+/// This is an architecture-specific helper function of reassociateOps.
+/// Set special operand attributes for new instructions after reassociation.
+void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
+ MachineInstr &OldMI2,
+ MachineInstr &NewMI1,
+ MachineInstr &NewMI2) const {
+ // Propagate FP flags from the original instructions.
+ // But clear poison-generating flags because those may not be valid now.
+ uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
+ NewMI1.setFlags(IntersectedFlags);
+ NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ NewMI1.clearFlag(MachineInstr::MIFlag::IsExact);
+
+ NewMI2.setFlags(IntersectedFlags);
+ NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ NewMI2.clearFlag(MachineInstr::MIFlag::IsExact);
+}
+
+void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI,
+ uint16_t Flags) const {
+ MI.setFlags(Flags);
+ MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
+ MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
+ MI.clearFlag(MachineInstr::MIFlag::IsExact);
+}
+
// This function does not list all associative and commutative operations, but
// only those worth feeding through the machine combiner in an attempt to
// reduce the critical path. Mostly, this means floating-point operations,
-// because they have high latencies (compared to other operations, such and
+// because they have high latencies(>=5) (compared to other operations, such as
// and/or, which are also associative and commutative, but have low latencies).
bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
switch (Inst.getOpcode()) {
+ // Floating point:
// FP Add:
case PPC::FADD:
case PPC::FADDS:
@@ -258,12 +267,157 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
case PPC::QVFMUL:
case PPC::QVFMULS:
case PPC::QVFMULSs:
+ return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+ Inst.getFlag(MachineInstr::MIFlag::FmNsz);
+ // Fixed point:
+ // Multiply:
+ case PPC::MULHD:
+ case PPC::MULLD:
+ case PPC::MULHW:
+ case PPC::MULLW:
return true;
default:
return false;
}
}
+#define InfoArrayIdxFMAInst 0
+#define InfoArrayIdxFAddInst 1
+#define InfoArrayIdxFMULInst 2
+#define InfoArrayIdxAddOpIdx 3
+#define InfoArrayIdxMULOpIdx 4
+// Array keeps info for FMA instructions:
+// Index 0(InfoArrayIdxFMAInst): FMA instruction;
+// Index 1(InfoArrayIdxFAddInst): ADD instruction assoaicted with FMA;
+// Index 2(InfoArrayIdxFMULInst): MUL instruction assoaicted with FMA;
+// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
+// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
+// second MUL operand index is plus 1.
+static const uint16_t FMAOpIdxInfo[][5] = {
+ // FIXME: Add more FMA instructions like XSNMADDADP and so on.
+ {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2},
+ {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2},
+ {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2},
+ {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2},
+ {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1},
+ {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1},
+ {PPC::QVFMADDSs, PPC::QVFADDSs, PPC::QVFMULSs, 3, 1},
+ {PPC::QVFMADD, PPC::QVFADD, PPC::QVFMUL, 3, 1}};
+
+// Check if an opcode is a FMA instruction. If it is, return the index in array
+// FMAOpIdxInfo. Otherwise, return -1.
+int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
+ for (unsigned I = 0; I < array_lengthof(FMAOpIdxInfo); I++)
+ if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
+ return I;
+ return -1;
+}
+
+// Try to reassociate FMA chains like below:
+//
+// Pattern 1:
+// A = FADD X, Y (Leaf)
+// B = FMA A, M21, M22 (Prev)
+// C = FMA B, M31, M32 (Root)
+// -->
+// A = FMA X, M21, M22
+// B = FMA Y, M31, M32
+// C = FADD A, B
+//
+// Pattern 2:
+// A = FMA X, M11, M12 (Leaf)
+// B = FMA A, M21, M22 (Prev)
+// C = FMA B, M31, M32 (Root)
+// -->
+// A = FMUL M11, M12
+// B = FMA X, M21, M22
+// D = FMA A, M31, M32
+// C = FADD B, D
+//
+// breaking the dependency between A and B, allowing FMA to be executed in
+// parallel (or back-to-back in a pipeline) instead of depending on each other.
+bool PPCInstrInfo::getFMAPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ MachineBasicBlock *MBB = Root.getParent();
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
+ for (const auto &MO : Instr.explicit_operands())
+ if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
+ return false;
+ return true;
+ };
+
+ auto IsReassociable = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
+ bool IsLeaf, bool IsAdd) {
+ int16_t Idx = -1;
+ if (!IsAdd) {
+ Idx = getFMAOpIdxInfo(Instr.getOpcode());
+ if (Idx < 0)
+ return false;
+ } else if (Instr.getOpcode() !=
+ FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())]
+ [InfoArrayIdxFAddInst])
+ return false;
+
+ // Instruction can be reassociated.
+ // fast math flags may prohibit reassociation.
+ if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+ Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
+ return false;
+
+ // Instruction operands are virtual registers for reassociation.
+ if (!IsAllOpsVirtualReg(Instr))
+ return false;
+
+ if (IsAdd && IsLeaf)
+ return true;
+
+ AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
+
+ const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
+ MachineInstr *MIAdd = MRI.getUniqueVRegDef(OpAdd.getReg());
+ // If 'add' operand's def is not in current block, don't do ILP related opt.
+ if (!MIAdd || MIAdd->getParent() != MBB)
+ return false;
+
+ // If this is not Leaf FMA Instr, its 'add' operand should only have one use
+ // as this fma will be changed later.
+ return IsLeaf ? true : MRI.hasOneNonDBGUse(OpAdd.getReg());
+ };
+
+ int16_t AddOpIdx = -1;
+ // Root must be a valid FMA like instruction.
+ if (!IsReassociable(Root, AddOpIdx, false, false))
+ return false;
+
+ assert((AddOpIdx >= 0) && "add operand index not right!");
+
+ Register RegB = Root.getOperand(AddOpIdx).getReg();
+ MachineInstr *Prev = MRI.getUniqueVRegDef(RegB);
+
+ // Prev must be a valid FMA like instruction.
+ AddOpIdx = -1;
+ if (!IsReassociable(*Prev, AddOpIdx, false, false))
+ return false;
+
+ assert((AddOpIdx >= 0) && "add operand index not right!");
+
+ Register RegA = Prev->getOperand(AddOpIdx).getReg();
+ MachineInstr *Leaf = MRI.getUniqueVRegDef(RegA);
+ AddOpIdx = -1;
+ if (IsReassociable(*Leaf, AddOpIdx, true, false)) {
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XMM_AMM_BMM);
+ return true;
+ }
+ if (IsReassociable(*Leaf, AddOpIdx, true, true)) {
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XY_AMM_BMM);
+ return true;
+ }
+ return false;
+}
+
bool PPCInstrInfo::getMachineCombinerPatterns(
MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
@@ -272,16 +426,201 @@ bool PPCInstrInfo::getMachineCombinerPatterns(
if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
return false;
- // FP reassociation is only legal when we don't need strict IEEE semantics.
- if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
- return false;
+ if (getFMAPatterns(Root, Patterns))
+ return true;
return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
}
+void PPCInstrInfo::genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+ switch (Pattern) {
+ case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
+ case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
+ reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
+ break;
+ default:
+ // Reassociate default patterns.
+ TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
+ DelInstrs, InstrIdxForVirtReg);
+ break;
+ }
+}
+
+// Currently, only handle two patterns REASSOC_XY_AMM_BMM and
+// REASSOC_XMM_AMM_BMM. See comments for getFMAPatterns.
+void PPCInstrInfo::reassociateFMA(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+ MachineFunction *MF = Root.getMF();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineOperand &OpC = Root.getOperand(0);
+ Register RegC = OpC.getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(RegC);
+ MRI.constrainRegClass(RegC, RC);
+
+ unsigned FmaOp = Root.getOpcode();
+ int16_t Idx = getFMAOpIdxInfo(FmaOp);
+ assert(Idx >= 0 && "Root must be a FMA instruction");
+
+ uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
+ uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
+ MachineInstr *Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
+ MachineInstr *Leaf =
+ MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
+ uint16_t IntersectedFlags =
+ Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
+
+ auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
+ bool &KillFlag) {
+ Reg = Operand.getReg();
+ MRI.constrainRegClass(Reg, RC);
+ KillFlag = Operand.isKill();
+ };
+
+ auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
+ Register &MulOp2, bool &MulOp1KillFlag,
+ bool &MulOp2KillFlag) {
+ GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
+ GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
+ };
+
+ Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32;
+ bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
+ KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false;
+
+ GetFMAInstrInfo(Root, RegM31, RegM32, KillM31, KillM32);
+ GetFMAInstrInfo(*Prev, RegM21, RegM22, KillM21, KillM22);
+
+ if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+ GetFMAInstrInfo(*Leaf, RegM11, RegM12, KillM11, KillM12);
+ GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
+ } else if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) {
+ GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
+ GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
+ }
+
+ // Create new virtual registers for the new results instead of
+ // recycling legacy ones because the MachineCombiner's computation of the
+ // critical path requires a new register definition rather than an existing
+ // one.
+ Register NewVRA = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
+
+ Register NewVRB = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
+
+ Register NewVRD = 0;
+ if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+ NewVRD = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
+ }
+
+ auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
+ Register RegMul1, bool KillRegMul1,
+ Register RegMul2, bool KillRegMul2) {
+ MI->getOperand(AddOpIdx).setReg(RegAdd);
+ MI->getOperand(AddOpIdx).setIsKill(KillAdd);
+ MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
+ MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
+ MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
+ MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
+ };
+
+ if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) {
+ // Create new instructions for insertion.
+ MachineInstrBuilder MINewB =
+ BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(RegM21, getKillRegState(KillM21))
+ .addReg(RegM22, getKillRegState(KillM22));
+ MachineInstrBuilder MINewA =
+ BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
+ .addReg(RegY, getKillRegState(KillY))
+ .addReg(RegM31, getKillRegState(KillM31))
+ .addReg(RegM32, getKillRegState(KillM32));
+ // If AddOpIdx is not 1, adjust the order.
+ if (AddOpIdx != 1) {
+ AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
+ AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
+ }
+
+ MachineInstrBuilder MINewC =
+ BuildMI(*MF, Root.getDebugLoc(),
+ get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC)
+ .addReg(NewVRB, getKillRegState(true))
+ .addReg(NewVRA, getKillRegState(true));
+
+ // Update flags for newly created instructions.
+ setSpecialOperandAttr(*MINewA, IntersectedFlags);
+ setSpecialOperandAttr(*MINewB, IntersectedFlags);
+ setSpecialOperandAttr(*MINewC, IntersectedFlags);
+
+ // Record new instructions for insertion.
+ InsInstrs.push_back(MINewA);
+ InsInstrs.push_back(MINewB);
+ InsInstrs.push_back(MINewC);
+ } else if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+ assert(NewVRD && "new FMA register not created!");
+ // Create new instructions for insertion.
+ MachineInstrBuilder MINewA =
+ BuildMI(*MF, Leaf->getDebugLoc(),
+ get(FMAOpIdxInfo[Idx][InfoArrayIdxFMULInst]), NewVRA)
+ .addReg(RegM11, getKillRegState(KillM11))
+ .addReg(RegM12, getKillRegState(KillM12));
+ MachineInstrBuilder MINewB =
+ BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(RegM21, getKillRegState(KillM21))
+ .addReg(RegM22, getKillRegState(KillM22));
+ MachineInstrBuilder MINewD =
+ BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
+ .addReg(NewVRA, getKillRegState(true))
+ .addReg(RegM31, getKillRegState(KillM31))
+ .addReg(RegM32, getKillRegState(KillM32));
+ // If AddOpIdx is not 1, adjust the order.
+ if (AddOpIdx != 1) {
+ AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
+ AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
+ KillM32);
+ }
+
+ MachineInstrBuilder MINewC =
+ BuildMI(*MF, Root.getDebugLoc(),
+ get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC)
+ .addReg(NewVRB, getKillRegState(true))
+ .addReg(NewVRD, getKillRegState(true));
+
+ // Update flags for newly created instructions.
+ setSpecialOperandAttr(*MINewA, IntersectedFlags);
+ setSpecialOperandAttr(*MINewB, IntersectedFlags);
+ setSpecialOperandAttr(*MINewD, IntersectedFlags);
+ setSpecialOperandAttr(*MINewC, IntersectedFlags);
+
+ // Record new instructions for insertion.
+ InsInstrs.push_back(MINewA);
+ InsInstrs.push_back(MINewB);
+ InsInstrs.push_back(MINewD);
+ InsInstrs.push_back(MINewC);
+ }
+
+ assert(!InsInstrs.empty() &&
+ "Insertion instructions set should not be empty!");
+
+ // Record old instructions for deletion.
+ DelInstrs.push_back(Leaf);
+ DelInstrs.push_back(Prev);
+ DelInstrs.push_back(&Root);
+}
+
// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
+ Register &SrcReg, Register &DstReg,
unsigned &SubIdx) const {
switch (MI.getOpcode()) {
default: return false;
@@ -753,9 +1092,10 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
// Select analysis.
bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
- ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+ ArrayRef<MachineOperand> Cond,
+ Register DstReg, Register TrueReg,
+ Register FalseReg, int &CondCycles,
+ int &TrueCycles, int &FalseCycles) const {
if (Cond.size() != 2)
return false;
@@ -791,9 +1131,9 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const DebugLoc &dl, unsigned DestReg,
- ArrayRef<MachineOperand> Cond, unsigned TrueReg,
- unsigned FalseReg) const {
+ const DebugLoc &dl, Register DestReg,
+ ArrayRef<MachineOperand> Cond, Register TrueReg,
+ Register FalseReg) const {
assert(Cond.size() == 2 &&
"PPC branch conditions have two components!");
@@ -852,7 +1192,7 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
}
- unsigned FirstReg = SwapOps ? FalseReg : TrueReg,
+ Register FirstReg = SwapOps ? FalseReg : TrueReg,
SecondReg = SwapOps ? TrueReg : FalseReg;
// The first input register of isel cannot be r0. If it is a member
@@ -863,7 +1203,7 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
const TargetRegisterClass *FirstRC =
MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
&PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
- unsigned OldFirstReg = FirstReg;
+ Register OldFirstReg = FirstReg;
FirstReg = MRI.createVirtualRegister(FirstRC);
BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
.addReg(OldFirstReg);
@@ -1024,183 +1364,66 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
}
-unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg,
- const TargetRegisterClass *RC)
- const {
- const unsigned *OpcodesForSpill = getStoreOpcodesForSpillArray();
+static unsigned getSpillIndex(const TargetRegisterClass *RC) {
int OpcodeIndex = 0;
- if (RC != nullptr) {
- if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
- PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Int4Spill;
- } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
- PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Int8Spill;
- } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Float8Spill;
- } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Float4Spill;
- } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_SPESpill;
- } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_CRSpill;
- } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_CRBitSpill;
- } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VRVectorSpill;
- } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VSXVectorSpill;
- } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VectorFloat8Spill;
- } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VectorFloat4Spill;
- } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VRSaveSpill;
- } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadFloat8Spill;
- } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadFloat4Spill;
- } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadBitSpill;
- } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_SpillToVSR;
- } else {
- llvm_unreachable("Unknown regclass!");
- }
+ if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
+ PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_Int4Spill;
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
+ PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_Int8Spill;
+ } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_Float8Spill;
+ } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_Float4Spill;
+ } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_SPESpill;
+ } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_CRSpill;
+ } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_CRBitSpill;
+ } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_VRVectorSpill;
+ } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_VSXVectorSpill;
+ } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_VectorFloat8Spill;
+ } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_VectorFloat4Spill;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_VRSaveSpill;
+ } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_QuadFloat8Spill;
+ } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_QuadFloat4Spill;
+ } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_QuadBitSpill;
+ } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_SpillToVSR;
} else {
- if (PPC::GPRCRegClass.contains(Reg) ||
- PPC::GPRC_NOR0RegClass.contains(Reg)) {
- OpcodeIndex = SOK_Int4Spill;
- } else if (PPC::G8RCRegClass.contains(Reg) ||
- PPC::G8RC_NOX0RegClass.contains(Reg)) {
- OpcodeIndex = SOK_Int8Spill;
- } else if (PPC::F8RCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_Float8Spill;
- } else if (PPC::F4RCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_Float4Spill;
- } else if (PPC::SPERCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_SPESpill;
- } else if (PPC::CRRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_CRSpill;
- } else if (PPC::CRBITRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_CRBitSpill;
- } else if (PPC::VRRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VRVectorSpill;
- } else if (PPC::VSRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VSXVectorSpill;
- } else if (PPC::VSFRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VectorFloat8Spill;
- } else if (PPC::VSSRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VectorFloat4Spill;
- } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VRSaveSpill;
- } else if (PPC::QFRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_QuadFloat8Spill;
- } else if (PPC::QSRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_QuadFloat4Spill;
- } else if (PPC::QBRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_QuadBitSpill;
- } else if (PPC::SPILLTOVSRRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_SpillToVSR;
- } else {
- llvm_unreachable("Unknown regclass!");
- }
+ llvm_unreachable("Unknown regclass!");
}
- return OpcodesForSpill[OpcodeIndex];
+ return OpcodeIndex;
}
unsigned
-PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg,
- const TargetRegisterClass *RC) const {
- const unsigned *OpcodesForSpill = getLoadOpcodesForSpillArray();
- int OpcodeIndex = 0;
+PPCInstrInfo::getStoreOpcodeForSpill(const TargetRegisterClass *RC) const {
+ const unsigned *OpcodesForSpill = getStoreOpcodesForSpillArray();
+ return OpcodesForSpill[getSpillIndex(RC)];
+}
- if (RC != nullptr) {
- if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
- PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Int4Spill;
- } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
- PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Int8Spill;
- } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Float8Spill;
- } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_Float4Spill;
- } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_SPESpill;
- } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_CRSpill;
- } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_CRBitSpill;
- } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VRVectorSpill;
- } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VSXVectorSpill;
- } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VectorFloat8Spill;
- } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VectorFloat4Spill;
- } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_VRSaveSpill;
- } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadFloat8Spill;
- } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadFloat4Spill;
- } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_QuadBitSpill;
- } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
- OpcodeIndex = SOK_SpillToVSR;
- } else {
- llvm_unreachable("Unknown regclass!");
- }
- } else {
- if (PPC::GPRCRegClass.contains(Reg) ||
- PPC::GPRC_NOR0RegClass.contains(Reg)) {
- OpcodeIndex = SOK_Int4Spill;
- } else if (PPC::G8RCRegClass.contains(Reg) ||
- PPC::G8RC_NOX0RegClass.contains(Reg)) {
- OpcodeIndex = SOK_Int8Spill;
- } else if (PPC::F8RCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_Float8Spill;
- } else if (PPC::F4RCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_Float4Spill;
- } else if (PPC::SPERCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_SPESpill;
- } else if (PPC::CRRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_CRSpill;
- } else if (PPC::CRBITRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_CRBitSpill;
- } else if (PPC::VRRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VRVectorSpill;
- } else if (PPC::VSRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VSXVectorSpill;
- } else if (PPC::VSFRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VectorFloat8Spill;
- } else if (PPC::VSSRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VectorFloat4Spill;
- } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_VRSaveSpill;
- } else if (PPC::QFRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_QuadFloat8Spill;
- } else if (PPC::QSRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_QuadFloat4Spill;
- } else if (PPC::QBRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_QuadBitSpill;
- } else if (PPC::SPILLTOVSRRCRegClass.contains(Reg)) {
- OpcodeIndex = SOK_SpillToVSR;
- } else {
- llvm_unreachable("Unknown regclass!");
- }
- }
- return OpcodesForSpill[OpcodeIndex];
+unsigned
+PPCInstrInfo::getLoadOpcodeForSpill(const TargetRegisterClass *RC) const {
+ const unsigned *OpcodesForSpill = getLoadOpcodesForSpillArray();
+ return OpcodesForSpill[getSpillIndex(RC)];
}
void PPCInstrInfo::StoreRegToStackSlot(
MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr *> &NewMIs) const {
- unsigned Opcode = getStoreOpcodeForSpill(PPC::NoRegister, RC);
+ unsigned Opcode = getStoreOpcodeForSpill(RC);
DebugLoc DL;
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
@@ -1221,24 +1444,13 @@ void PPCInstrInfo::StoreRegToStackSlot(
FuncInfo->setHasNonRISpills();
}
-void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill,
- int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+void PPCInstrInfo::storeRegToStackSlotNoUpd(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg,
+ bool isKill, int FrameIdx, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr *, 4> NewMIs;
- // We need to avoid a situation in which the value from a VRRC register is
- // spilled using an Altivec instruction and reloaded into a VSRC register
- // using a VSX instruction. The issue with this is that the VSX
- // load/store instructions swap the doublewords in the vector and the Altivec
- // ones don't. The register classes on the spill/reload may be different if
- // the register is defined using an Altivec instruction and is then used by a
- // VSX instruction.
- RC = updatedRC(RC);
-
StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
@@ -1248,16 +1460,33 @@ void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdx),
MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MFI.getObjectAlign(FrameIdx));
NewMIs.back()->addMemOperand(MF, MMO);
}
+void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ Register SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ // We need to avoid a situation in which the value from a VRRC register is
+ // spilled using an Altivec instruction and reloaded into a VSRC register
+ // using a VSX instruction. The issue with this is that the VSX
+ // load/store instructions swap the doublewords in the vector and the Altivec
+ // ones don't. The register classes on the spill/reload may be different if
+ // the register is defined using an Altivec instruction and is then used by a
+ // VSX instruction.
+ RC = updatedRC(RC);
+ storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);
+}
+
void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr *> &NewMIs)
const {
- unsigned Opcode = getLoadOpcodeForSpill(PPC::NoRegister, RC);
+ unsigned Opcode = getLoadOpcodeForSpill(RC);
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),
FrameIdx));
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
@@ -1273,12 +1502,10 @@ void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
FuncInfo->setHasNonRISpills();
}
-void
-PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+void PPCInstrInfo::loadRegFromStackSlotNoUpd(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg,
+ int FrameIdx, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
@@ -1287,16 +1514,6 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setHasSpills();
- // We need to avoid a situation in which the value from a VRRC register is
- // spilled using an Altivec instruction and reloaded into a VSRC register
- // using a VSX instruction. The issue with this is that the VSX
- // load/store instructions swap the doublewords in the vector and the Altivec
- // ones don't. The register classes on the spill/reload may be different if
- // the register is defined using an Altivec instruction and is then used by a
- // VSX instruction.
- if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
- RC = &PPC::VSRCRegClass;
-
LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
@@ -1306,10 +1523,27 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FrameIdx),
MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MFI.getObjectAlign(FrameIdx));
NewMIs.back()->addMemOperand(MF, MMO);
}
+void PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ Register DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ // We need to avoid a situation in which the value from a VRRC register is
+ // spilled using an Altivec instruction and reloaded into a VSRC register
+ // using a VSX instruction. The issue with this is that the VSX
+ // load/store instructions swap the doublewords in the vector and the Altivec
+ // ones don't. The register classes on the spill/reload may be different if
+ // the register is defined using an Altivec instruction and is then used by a
+ // VSX instruction.
+ RC = updatedRC(RC);
+
+ loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);
+}
+
bool PPCInstrInfo::
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
@@ -1321,9 +1555,11 @@ reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
-bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
- unsigned Reg, MachineRegisterInfo *MRI) const {
- // For some instructions, it is legal to fold ZERO into the RA register field.
+// For some instructions, it is legal to fold ZERO into the RA register field.
+// This function performs that fold by replacing the operand with PPC::ZERO,
+// it does not consider whether the load immediate zero is no longer in use.
+bool PPCInstrInfo::onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+ Register Reg) const {
// A zero immediate should always be loaded with a single li.
unsigned DefOpc = DefMI.getOpcode();
if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
@@ -1343,6 +1579,8 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (UseMCID.isPseudo())
return false;
+ // We need to find which of the User's operands is to be folded, that will be
+ // the operand that matches the given register ID.
unsigned UseIdx;
for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
if (UseMI.getOperand(UseIdx).isReg() &&
@@ -1371,7 +1609,7 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (UseInfo->Constraints != 0)
return false;
- unsigned ZeroReg;
+ MCRegister ZeroReg;
if (UseInfo->isLookupPtrRegClass()) {
bool isPPC64 = Subtarget.isPPC64();
ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
@@ -1380,13 +1618,19 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
PPC::ZERO8 : PPC::ZERO;
}
- bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
UseMI.getOperand(UseIdx).setReg(ZeroReg);
+ return true;
+}
- if (DeleteDef)
+// Folds zero into instructions which have a load immediate zero as an operand
+// but also recognize zero as immediate zero. If the definition of the load
+// has no more users it is deleted.
+bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+ Register Reg, MachineRegisterInfo *MRI) const {
+ bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
+ if (MRI->use_nodbg_empty(Reg))
DefMI.eraseFromParent();
-
- return true;
+ return Changed;
}
static bool MBBDefinesCTR(MachineBasicBlock &MBB) {
@@ -1423,17 +1667,6 @@ bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const {
return false;
}
-bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
- if (!MI.isTerminator())
- return false;
-
- // Conditional branch is a special case.
- if (MI.isBranch() && !MI.isBarrier())
- return true;
-
- return !isPredicated(MI);
-}
-
bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const {
unsigned OpC = MI.getOpcode();
@@ -1587,8 +1820,8 @@ bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI,
return Found;
}
-bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &Mask,
+bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &Mask,
int &Value) const {
unsigned Opc = MI.getOpcode();
@@ -1617,8 +1850,8 @@ bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
}
}
-bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
- unsigned SrcReg2, int Mask, int Value,
+bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int Mask, int Value,
const MachineRegisterInfo *MRI) const {
if (DisableCmpOpt)
return false;
@@ -1646,8 +1879,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
// Look through copies unless that gets us to a physical register.
- unsigned ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
- if (Register::isVirtualRegister(ActualSrc))
+ Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
+ if (ActualSrc.isVirtual())
SrcReg = ActualSrc;
// Get the unique definition of SrcReg.
@@ -2036,8 +2269,8 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_PLT, "ppc-plt"},
{MO_PIC_FLAG, "ppc-pic"},
- {MO_NLP_FLAG, "ppc-nlp"},
- {MO_NLP_HIDDEN_FLAG, "ppc-nlp-hidden"}};
+ {MO_PCREL_FLAG, "ppc-pcrel"},
+ {MO_GOT_FLAG, "ppc-got"}};
return makeArrayRef(TargetFlags);
}
@@ -2330,7 +2563,8 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
const TargetRegisterInfo *TRI = &getRegisterInfo();
// If we're in SSA, get the defs through the MRI. Otherwise, only look
- // within the basic block to see if the register is defined using an LI/LI8.
+ // within the basic block to see if the register is defined using an
+ // LI/LI8/ADDI/ADDI8.
if (MRI->isSSA()) {
for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
if (!MI.getOperand(i).isReg())
@@ -2341,9 +2575,16 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
unsigned TrueReg = TRI->lookThruCopyLike(Reg, MRI);
if (Register::isVirtualRegister(TrueReg)) {
DefMI = MRI->getVRegDef(TrueReg);
- if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) {
+ if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8 ||
+ DefMI->getOpcode() == PPC::ADDI ||
+ DefMI->getOpcode() == PPC::ADDI8) {
OpNoForForwarding = i;
- break;
+ // The ADDI and LI operand maybe exist in one instruction at same
+ // time. we prefer to fold LI operand as LI only has one Imm operand
+ // and is more possible to be converted. So if current DefMI is
+ // ADDI/ADDI8, we continue to find possible LI/LI8.
+ if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)
+ break;
}
}
}
@@ -2400,44 +2641,20 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
return OpNoForForwarding == ~0U ? nullptr : DefMI;
}
+unsigned PPCInstrInfo::getSpillTarget() const {
+ return Subtarget.hasP9Vector() ? 1 : 0;
+}
+
const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const {
- static const unsigned OpcodesForSpill[2][SOK_LastOpcodeSpill] = {
- // Power 8
- {PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR,
- PPC::SPILL_CRBIT, PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX,
- PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb,
- PPC::SPILLTOVSR_ST, PPC::EVSTDD},
- // Power 9
- {PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR,
- PPC::SPILL_CRBIT, PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32,
- PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb,
- PPC::SPILLTOVSR_ST}};
-
- return OpcodesForSpill[(Subtarget.hasP9Vector()) ? 1 : 0];
+ return StoreSpillOpcodesArray[getSpillTarget()];
}
const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
- static const unsigned OpcodesForSpill[2][SOK_LastOpcodeSpill] = {
- // Power 8
- {PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR,
- PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX,
- PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb,
- PPC::SPILLTOVSR_LD, PPC::EVLDD},
- // Power 9
- {PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR,
- PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, PPC::DFLOADf32,
- PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb,
- PPC::SPILLTOVSR_LD}};
-
- return OpcodesForSpill[(Subtarget.hasP9Vector()) ? 1 : 0];
+ return LoadSpillOpcodesArray[getSpillTarget()];
}
void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
unsigned RegNo) const {
- const MachineRegisterInfo &MRI =
- StartMI.getParent()->getParent()->getRegInfo();
- if (MRI.isSSA())
- return;
// Instructions between [StartMI, EndMI] should be in same basic block.
assert((StartMI.getParent() == EndMI.getParent()) &&
@@ -2588,6 +2805,13 @@ bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const {
return true;
return false;
};
+
+ // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
+ // treated as special zero when ScaleReg is R0/X0 register.
+ if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
+ (ScaleReg == PPC::R0 || ScaleReg == PPC::X0))
+ return false;
+
// Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
// and Imm Instr.
if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
@@ -2631,6 +2855,10 @@ bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,
if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
return false;
+ // The operand may not necessarily be an immediate - it could be a relocation.
+ if (!ADDIMI.getOperand(2).isImm())
+ return false;
+
Imm = ADDIMI.getOperand(2).getImm();
return true;
@@ -2746,10 +2974,16 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
"The forwarding operand needs to be valid at this point");
bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
- Register ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg();
if (KilledDef && KillFwdDefMI)
*KilledDef = DefMI;
+ // If this is a imm instruction and its register operands is produced by ADDI,
+ // put the imm into imm inst directly.
+ if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
+ PPC::INSTRUCTION_LIST_END &&
+ transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))
+ return true;
+
ImmInstrInfo III;
bool IsVFReg = MI.getOperand(0).isReg()
? isVFRegister(MI.getOperand(0).getReg())
@@ -2763,228 +2997,17 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
KillFwdDefMI))
return true;
- if ((DefMI->getOpcode() != PPC::LI && DefMI->getOpcode() != PPC::LI8) ||
- !DefMI->getOperand(1).isImm())
- return false;
-
- int64_t Immediate = DefMI->getOperand(1).getImm();
- // Sign-extend to 64-bits.
- int64_t SExtImm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
- (Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
-
// If this is a reg+reg instruction that has a reg+imm form,
// and one of the operands is produced by LI, convert it now.
- if (HasImmForm)
- return transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI, SExtImm);
-
- bool ReplaceWithLI = false;
- bool Is64BitLI = false;
- int64_t NewImm = 0;
- bool SetCR = false;
- unsigned Opc = MI.getOpcode();
- switch (Opc) {
- default: return false;
-
- // FIXME: Any branches conditional on such a comparison can be made
- // unconditional. At this time, this happens too infrequently to be worth
- // the implementation effort, but if that ever changes, we could convert
- // such a pattern here.
- case PPC::CMPWI:
- case PPC::CMPLWI:
- case PPC::CMPDI:
- case PPC::CMPLDI: {
- // Doing this post-RA would require dataflow analysis to reliably find uses
- // of the CR register set by the compare.
- // No need to fixup killed/dead flag since this transformation is only valid
- // before RA.
- if (PostRA)
- return false;
- // If a compare-immediate is fed by an immediate and is itself an input of
- // an ISEL (the most common case) into a COPY of the correct register.
- bool Changed = false;
- Register DefReg = MI.getOperand(0).getReg();
- int64_t Comparand = MI.getOperand(2).getImm();
- int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 ?
- (Comparand | 0xFFFFFFFFFFFF0000) : Comparand;
-
- for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
- unsigned UseOpc = CompareUseMI.getOpcode();
- if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
- continue;
- unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
- Register TrueReg = CompareUseMI.getOperand(1).getReg();
- Register FalseReg = CompareUseMI.getOperand(2).getReg();
- unsigned RegToCopy = selectReg(SExtImm, SExtComparand, Opc, TrueReg,
- FalseReg, CRSubReg);
- if (RegToCopy == PPC::NoRegister)
- continue;
- // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
- if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
- CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
- replaceInstrOperandWithImm(CompareUseMI, 1, 0);
- CompareUseMI.RemoveOperand(3);
- CompareUseMI.RemoveOperand(2);
- continue;
- }
- LLVM_DEBUG(
- dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
- LLVM_DEBUG(DefMI->dump(); MI.dump(); CompareUseMI.dump());
- LLVM_DEBUG(dbgs() << "Is converted to:\n");
- // Convert to copy and remove unneeded operands.
- CompareUseMI.setDesc(get(PPC::COPY));
- CompareUseMI.RemoveOperand(3);
- CompareUseMI.RemoveOperand(RegToCopy == TrueReg ? 2 : 1);
- CmpIselsConverted++;
- Changed = true;
- LLVM_DEBUG(CompareUseMI.dump());
- }
- if (Changed)
- return true;
- // This may end up incremented multiple times since this function is called
- // during a fixed-point transformation, but it is only meant to indicate the
- // presence of this opportunity.
- MissedConvertibleImmediateInstrs++;
- return false;
- }
-
- // Immediate forms - may simply be convertable to an LI.
- case PPC::ADDI:
- case PPC::ADDI8: {
- // Does the sum fit in a 16-bit signed field?
- int64_t Addend = MI.getOperand(2).getImm();
- if (isInt<16>(Addend + SExtImm)) {
- ReplaceWithLI = true;
- Is64BitLI = Opc == PPC::ADDI8;
- NewImm = Addend + SExtImm;
- break;
- }
- return false;
- }
- case PPC::RLDICL:
- case PPC::RLDICL_rec:
- case PPC::RLDICL_32:
- case PPC::RLDICL_32_64: {
- // Use APInt's rotate function.
- int64_t SH = MI.getOperand(2).getImm();
- int64_t MB = MI.getOperand(3).getImm();
- APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
- SExtImm, true);
- InVal = InVal.rotl(SH);
- uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
- InVal &= Mask;
- // Can't replace negative values with an LI as that will sign-extend
- // and not clear the left bits. If we're setting the CR bit, we will use
- // ANDI_rec which won't sign extend, so that's safe.
- if (isUInt<15>(InVal.getSExtValue()) ||
- (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
- ReplaceWithLI = true;
- Is64BitLI = Opc != PPC::RLDICL_32;
- NewImm = InVal.getSExtValue();
- SetCR = Opc == PPC::RLDICL_rec;
- break;
- }
- return false;
- }
- case PPC::RLWINM:
- case PPC::RLWINM8:
- case PPC::RLWINM_rec:
- case PPC::RLWINM8_rec: {
- int64_t SH = MI.getOperand(2).getImm();
- int64_t MB = MI.getOperand(3).getImm();
- int64_t ME = MI.getOperand(4).getImm();
- APInt InVal(32, SExtImm, true);
- InVal = InVal.rotl(SH);
- // Set the bits ( MB + 32 ) to ( ME + 32 ).
- uint64_t Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
- InVal &= Mask;
- // Can't replace negative values with an LI as that will sign-extend
- // and not clear the left bits. If we're setting the CR bit, we will use
- // ANDI_rec which won't sign extend, so that's safe.
- bool ValueFits = isUInt<15>(InVal.getSExtValue());
- ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
- isUInt<16>(InVal.getSExtValue()));
- if (ValueFits) {
- ReplaceWithLI = true;
- Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
- NewImm = InVal.getSExtValue();
- SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
- break;
- }
- return false;
- }
- case PPC::ORI:
- case PPC::ORI8:
- case PPC::XORI:
- case PPC::XORI8: {
- int64_t LogicalImm = MI.getOperand(2).getImm();
- int64_t Result = 0;
- if (Opc == PPC::ORI || Opc == PPC::ORI8)
- Result = LogicalImm | SExtImm;
- else
- Result = LogicalImm ^ SExtImm;
- if (isInt<16>(Result)) {
- ReplaceWithLI = true;
- Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
- NewImm = Result;
- break;
- }
- return false;
- }
- }
-
- if (ReplaceWithLI) {
- // We need to be careful with CR-setting instructions we're replacing.
- if (SetCR) {
- // We don't know anything about uses when we're out of SSA, so only
- // replace if the new immediate will be reproduced.
- bool ImmChanged = (SExtImm & NewImm) != NewImm;
- if (PostRA && ImmChanged)
- return false;
-
- if (!PostRA) {
- // If the defining load-immediate has no other uses, we can just replace
- // the immediate with the new immediate.
- if (MRI->hasOneUse(DefMI->getOperand(0).getReg()))
- DefMI->getOperand(1).setImm(NewImm);
-
- // If we're not using the GPR result of the CR-setting instruction, we
- // just need to and with zero/non-zero depending on the new immediate.
- else if (MRI->use_empty(MI.getOperand(0).getReg())) {
- if (NewImm) {
- assert(Immediate && "Transformation converted zero to non-zero?");
- NewImm = Immediate;
- }
- }
- else if (ImmChanged)
- return false;
- }
- }
-
- LLVM_DEBUG(dbgs() << "Replacing instruction:\n");
- LLVM_DEBUG(MI.dump());
- LLVM_DEBUG(dbgs() << "Fed by:\n");
- LLVM_DEBUG(DefMI->dump());
- LoadImmediateInfo LII;
- LII.Imm = NewImm;
- LII.Is64Bit = Is64BitLI;
- LII.SetCR = SetCR;
- // If we're setting the CR, the original load-immediate must be kept (as an
- // operand to ANDI_rec/ANDI8_rec).
- if (KilledDef && SetCR)
- *KilledDef = nullptr;
- replaceInstrWithLI(MI, LII);
-
- // Fixup killed/dead flag after transformation.
- // Pattern:
- // ForwardingOperandReg = LI imm1
- // y = op2 imm2, ForwardingOperandReg(killed)
- if (IsForwardingOperandKilled)
- fixupIsDeadOrKill(*DefMI, MI, ForwardingOperandReg);
+ if (HasImmForm &&
+ transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))
+ return true;
- LLVM_DEBUG(dbgs() << "With:\n");
- LLVM_DEBUG(MI.dump());
+ // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
+ // can be simpified to LI.
+ if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef))
return true;
- }
+
return false;
}
@@ -3501,6 +3524,10 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
RegMO = &DefMI.getOperand(1);
ImmMO = &DefMI.getOperand(2);
+ // Before RA, ADDI first operand could be a frame index.
+ if (!RegMO->isReg())
+ return false;
+
// This DefMI is elgible for forwarding if it is:
// 1. add inst
// 2. one of the operands is Imm/CPI/Global.
@@ -3549,7 +3576,8 @@ bool PPCInstrInfo::isRegElgibleForForwarding(
bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
const MachineInstr &DefMI,
const ImmInstrInfo &III,
- int64_t &Imm) const {
+ int64_t &Imm,
+ int64_t BaseImm) const {
assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
if (DefMI.getOpcode() == PPC::ADDItocL) {
// The operand for ADDItocL is CPI, which isn't imm at compiling time,
@@ -3563,19 +3591,21 @@ bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
// not just an immediate but also a multiple of 4, or 16 depending on the
// load. A DForm load cannot be represented if it is a multiple of say 2.
// XForm loads do not have this restriction.
- if (ImmMO.isGlobal() &&
- ImmMO.getGlobal()->getAlignment() < III.ImmMustBeMultipleOf)
- return false;
+ if (ImmMO.isGlobal()) {
+ const DataLayout &DL = ImmMO.getGlobal()->getParent()->getDataLayout();
+ if (ImmMO.getGlobal()->getPointerAlignment(DL) < III.ImmMustBeMultipleOf)
+ return false;
+ }
return true;
}
if (ImmMO.isImm()) {
// It is Imm, we need to check if the Imm fit the range.
- int64_t Immediate = ImmMO.getImm();
// Sign-extend to 64-bits.
- Imm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ?
- (Immediate | 0xFFFFFFFFFFFF0000) : Immediate;
+ // DefMI may be folded with another imm form instruction, the result Imm is
+ // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
+ Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);
if (Imm % III.ImmMustBeMultipleOf)
return false;
@@ -3599,6 +3629,328 @@ bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
return true;
}
+bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
+ unsigned OpNoForForwarding,
+ MachineInstr **KilledDef) const {
+ if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
+ !DefMI.getOperand(1).isImm())
+ return false;
+
+ MachineFunction *MF = MI.getParent()->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ bool PostRA = !MRI->isSSA();
+
+ int64_t Immediate = DefMI.getOperand(1).getImm();
+ // Sign-extend to 64-bits.
+ int64_t SExtImm = SignExtend64<16>(Immediate);
+
+ bool IsForwardingOperandKilled = MI.getOperand(OpNoForForwarding).isKill();
+ Register ForwardingOperandReg = MI.getOperand(OpNoForForwarding).getReg();
+
+ bool ReplaceWithLI = false;
+ bool Is64BitLI = false;
+ int64_t NewImm = 0;
+ bool SetCR = false;
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ default:
+ return false;
+
+ // FIXME: Any branches conditional on such a comparison can be made
+ // unconditional. At this time, this happens too infrequently to be worth
+ // the implementation effort, but if that ever changes, we could convert
+ // such a pattern here.
+ case PPC::CMPWI:
+ case PPC::CMPLWI:
+ case PPC::CMPDI:
+ case PPC::CMPLDI: {
+ // Doing this post-RA would require dataflow analysis to reliably find uses
+ // of the CR register set by the compare.
+ // No need to fixup killed/dead flag since this transformation is only valid
+ // before RA.
+ if (PostRA)
+ return false;
+ // If a compare-immediate is fed by an immediate and is itself an input of
+ // an ISEL (the most common case) into a COPY of the correct register.
+ bool Changed = false;
+ Register DefReg = MI.getOperand(0).getReg();
+ int64_t Comparand = MI.getOperand(2).getImm();
+ int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0
+ ? (Comparand | 0xFFFFFFFFFFFF0000)
+ : Comparand;
+
+ for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
+ unsigned UseOpc = CompareUseMI.getOpcode();
+ if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
+ continue;
+ unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
+ Register TrueReg = CompareUseMI.getOperand(1).getReg();
+ Register FalseReg = CompareUseMI.getOperand(2).getReg();
+ unsigned RegToCopy =
+ selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);
+ if (RegToCopy == PPC::NoRegister)
+ continue;
+ // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
+ if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
+ CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
+ replaceInstrOperandWithImm(CompareUseMI, 1, 0);
+ CompareUseMI.RemoveOperand(3);
+ CompareUseMI.RemoveOperand(2);
+ continue;
+ }
+ LLVM_DEBUG(
+ dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
+ LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
+ LLVM_DEBUG(dbgs() << "Is converted to:\n");
+ // Convert to copy and remove unneeded operands.
+ CompareUseMI.setDesc(get(PPC::COPY));
+ CompareUseMI.RemoveOperand(3);
+ CompareUseMI.RemoveOperand(RegToCopy == TrueReg ? 2 : 1);
+ CmpIselsConverted++;
+ Changed = true;
+ LLVM_DEBUG(CompareUseMI.dump());
+ }
+ if (Changed)
+ return true;
+ // This may end up incremented multiple times since this function is called
+ // during a fixed-point transformation, but it is only meant to indicate the
+ // presence of this opportunity.
+ MissedConvertibleImmediateInstrs++;
+ return false;
+ }
+
+ // Immediate forms - may simply be convertable to an LI.
+ case PPC::ADDI:
+ case PPC::ADDI8: {
+ // Does the sum fit in a 16-bit signed field?
+ int64_t Addend = MI.getOperand(2).getImm();
+ if (isInt<16>(Addend + SExtImm)) {
+ ReplaceWithLI = true;
+ Is64BitLI = Opc == PPC::ADDI8;
+ NewImm = Addend + SExtImm;
+ break;
+ }
+ return false;
+ }
+ case PPC::RLDICL:
+ case PPC::RLDICL_rec:
+ case PPC::RLDICL_32:
+ case PPC::RLDICL_32_64: {
+ // Use APInt's rotate function.
+ int64_t SH = MI.getOperand(2).getImm();
+ int64_t MB = MI.getOperand(3).getImm();
+ APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
+ SExtImm, true);
+ InVal = InVal.rotl(SH);
+ uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;
+ InVal &= Mask;
+ // Can't replace negative values with an LI as that will sign-extend
+ // and not clear the left bits. If we're setting the CR bit, we will use
+ // ANDI_rec which won't sign extend, so that's safe.
+ if (isUInt<15>(InVal.getSExtValue()) ||
+ (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
+ ReplaceWithLI = true;
+ Is64BitLI = Opc != PPC::RLDICL_32;
+ NewImm = InVal.getSExtValue();
+ SetCR = Opc == PPC::RLDICL_rec;
+ break;
+ }
+ return false;
+ }
+ case PPC::RLWINM:
+ case PPC::RLWINM8:
+ case PPC::RLWINM_rec:
+ case PPC::RLWINM8_rec: {
+ int64_t SH = MI.getOperand(2).getImm();
+ int64_t MB = MI.getOperand(3).getImm();
+ int64_t ME = MI.getOperand(4).getImm();
+ APInt InVal(32, SExtImm, true);
+ InVal = InVal.rotl(SH);
+ APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);
+ InVal &= Mask;
+ // Can't replace negative values with an LI as that will sign-extend
+ // and not clear the left bits. If we're setting the CR bit, we will use
+ // ANDI_rec which won't sign extend, so that's safe.
+ bool ValueFits = isUInt<15>(InVal.getSExtValue());
+ ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
+ isUInt<16>(InVal.getSExtValue()));
+ if (ValueFits) {
+ ReplaceWithLI = true;
+ Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
+ NewImm = InVal.getSExtValue();
+ SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
+ break;
+ }
+ return false;
+ }
+ case PPC::ORI:
+ case PPC::ORI8:
+ case PPC::XORI:
+ case PPC::XORI8: {
+ int64_t LogicalImm = MI.getOperand(2).getImm();
+ int64_t Result = 0;
+ if (Opc == PPC::ORI || Opc == PPC::ORI8)
+ Result = LogicalImm | SExtImm;
+ else
+ Result = LogicalImm ^ SExtImm;
+ if (isInt<16>(Result)) {
+ ReplaceWithLI = true;
+ Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
+ NewImm = Result;
+ break;
+ }
+ return false;
+ }
+ }
+
+ if (ReplaceWithLI) {
+ // We need to be careful with CR-setting instructions we're replacing.
+ if (SetCR) {
+ // We don't know anything about uses when we're out of SSA, so only
+ // replace if the new immediate will be reproduced.
+ bool ImmChanged = (SExtImm & NewImm) != NewImm;
+ if (PostRA && ImmChanged)
+ return false;
+
+ if (!PostRA) {
+ // If the defining load-immediate has no other uses, we can just replace
+ // the immediate with the new immediate.
+ if (MRI->hasOneUse(DefMI.getOperand(0).getReg()))
+ DefMI.getOperand(1).setImm(NewImm);
+
+ // If we're not using the GPR result of the CR-setting instruction, we
+ // just need to and with zero/non-zero depending on the new immediate.
+ else if (MRI->use_empty(MI.getOperand(0).getReg())) {
+ if (NewImm) {
+ assert(Immediate && "Transformation converted zero to non-zero?");
+ NewImm = Immediate;
+ }
+ } else if (ImmChanged)
+ return false;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Replacing instruction:\n");
+ LLVM_DEBUG(MI.dump());
+ LLVM_DEBUG(dbgs() << "Fed by:\n");
+ LLVM_DEBUG(DefMI.dump());
+ LoadImmediateInfo LII;
+ LII.Imm = NewImm;
+ LII.Is64Bit = Is64BitLI;
+ LII.SetCR = SetCR;
+ // If we're setting the CR, the original load-immediate must be kept (as an
+ // operand to ANDI_rec/ANDI8_rec).
+ if (KilledDef && SetCR)
+ *KilledDef = nullptr;
+ replaceInstrWithLI(MI, LII);
+
+ // Fixup killed/dead flag after transformation.
+ // Pattern:
+ // ForwardingOperandReg = LI imm1
+ // y = op2 imm2, ForwardingOperandReg(killed)
+ if (IsForwardingOperandKilled)
+ fixupIsDeadOrKill(DefMI, MI, ForwardingOperandReg);
+
+ LLVM_DEBUG(dbgs() << "With:\n");
+ LLVM_DEBUG(MI.dump());
+ return true;
+ }
+ return false;
+}
+
+bool PPCInstrInfo::transformToNewImmFormFedByAdd(
+ MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {
+ MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
+ bool PostRA = !MRI->isSSA();
+ // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
+ // for post-ra.
+ if (PostRA)
+ return false;
+
+ // Only handle load/store.
+ if (!MI.mayLoadOrStore())
+ return false;
+
+ unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode());
+
+ assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&
+ "MI must have x-form opcode");
+
+ // get Imm Form info.
+ ImmInstrInfo III;
+ bool IsVFReg = MI.getOperand(0).isReg()
+ ? isVFRegister(MI.getOperand(0).getReg())
+ : false;
+
+ if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))
+ return false;
+
+ if (!III.IsSummingOperands)
+ return false;
+
+ if (OpNoForForwarding != III.OpNoForForwarding)
+ return false;
+
+ MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo);
+ if (!ImmOperandMI.isImm())
+ return false;
+
+ // Check DefMI.
+ MachineOperand *ImmMO = nullptr;
+ MachineOperand *RegMO = nullptr;
+ if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
+ return false;
+ assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
+
+ // Check Imm.
+ // Set ImmBase from imm instruction as base and get new Imm inside
+ // isImmElgibleForForwarding.
+ int64_t ImmBase = ImmOperandMI.getImm();
+ int64_t Imm = 0;
+ if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))
+ return false;
+
+ // Get killed info in case fixup needed after transformation.
+ unsigned ForwardKilledOperandReg = ~0U;
+ if (MI.getOperand(III.OpNoForForwarding).isKill())
+ ForwardKilledOperandReg = MI.getOperand(III.OpNoForForwarding).getReg();
+
+ // Do the transform
+ LLVM_DEBUG(dbgs() << "Replacing instruction:\n");
+ LLVM_DEBUG(MI.dump());
+ LLVM_DEBUG(dbgs() << "Fed by:\n");
+ LLVM_DEBUG(DefMI.dump());
+
+ MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());
+ MI.getOperand(III.OpNoForForwarding).setIsKill(RegMO->isKill());
+ MI.getOperand(III.ImmOpNo).setImm(Imm);
+
+ // FIXME: fix kill/dead flag if MI and DefMI are not in same basic block.
+ if (DefMI.getParent() == MI.getParent()) {
+ // Check if reg is killed between MI and DefMI.
+ auto IsKilledFor = [&](unsigned Reg) {
+ MachineBasicBlock::const_reverse_iterator It = MI;
+ MachineBasicBlock::const_reverse_iterator E = DefMI;
+ It++;
+ for (; It != E; ++It) {
+ if (It->killsRegister(Reg))
+ return true;
+ }
+ return false;
+ };
+
+ // Update kill flag
+ if (RegMO->isKill() || IsKilledFor(RegMO->getReg()))
+ fixupIsDeadOrKill(DefMI, MI, RegMO->getReg());
+ if (ForwardKilledOperandReg != ~0U)
+ fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+ }
+
+ LLVM_DEBUG(dbgs() << "With:\n");
+ LLVM_DEBUG(MI.dump());
+ return true;
+}
+
// If an X-Form instruction is fed by an add-immediate and one of its operands
// is the literal zero, attempt to forward the source of the add-immediate to
// the corresponding D-Form instruction with the displacement coming from
@@ -3718,8 +4070,15 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(
bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
const ImmInstrInfo &III,
unsigned ConstantOpNo,
- MachineInstr &DefMI,
- int64_t Imm) const {
+ MachineInstr &DefMI) const {
+ // DefMI must be LI or LI8.
+ if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
+ !DefMI.getOperand(1).isImm())
+ return false;
+
+ // Get Imm operand and Sign-extend to 64-bits.
+ int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm());
+
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
bool PostRA = !MRI.isSSA();
// Exit early if we can't convert this.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 2fe8df0e1d68b..d98597f483406 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -65,7 +65,9 @@ enum {
NewDef_Shift = 6,
/// This instruction is an X-Form memory operation.
- XFormMemOp = 0x1 << (NewDef_Shift+1)
+ XFormMemOp = 0x1 << NewDef_Shift,
+ /// This instruction is prefixed.
+ Prefixed = 0x1 << (NewDef_Shift+1)
};
} // end namespace PPCII
@@ -108,10 +110,74 @@ struct LoadImmediateInfo {
unsigned SetCR : 1;
};
+// Index into the OpcodesForSpill array.
+enum SpillOpcodeKey {
+ SOK_Int4Spill,
+ SOK_Int8Spill,
+ SOK_Float8Spill,
+ SOK_Float4Spill,
+ SOK_CRSpill,
+ SOK_CRBitSpill,
+ SOK_VRVectorSpill,
+ SOK_VSXVectorSpill,
+ SOK_VectorFloat8Spill,
+ SOK_VectorFloat4Spill,
+ SOK_VRSaveSpill,
+ SOK_QuadFloat8Spill,
+ SOK_QuadFloat4Spill,
+ SOK_QuadBitSpill,
+ SOK_SpillToVSR,
+ SOK_SPESpill,
+ SOK_LastOpcodeSpill // This must be last on the enum.
+};
+
+// Define list of load and store spill opcodes.
+#define Pwr8LoadOpcodes \
+ { \
+ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
+ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, \
+ PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb, \
+ PPC::SPILLTOVSR_LD, PPC::EVLDD \
+ }
+
+#define Pwr9LoadOpcodes \
+ { \
+ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
+ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
+ PPC::DFLOADf32, PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, \
+ PPC::QVLFDXb, PPC::SPILLTOVSR_LD \
+ }
+
+#define Pwr8StoreOpcodes \
+ { \
+ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
+ PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, PPC::SPILL_VRSAVE, \
+ PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, PPC::SPILLTOVSR_ST, \
+ PPC::EVSTDD \
+ }
+
+#define Pwr9StoreOpcodes \
+ { \
+ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
+ PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
+ PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, \
+ PPC::SPILLTOVSR_ST \
+ }
+
+// Initialize arrays for load and store spill opcodes on supported subtargets.
+#define StoreOpcodesForSpill \
+ { Pwr8StoreOpcodes, Pwr9StoreOpcodes }
+#define LoadOpcodesForSpill \
+ { Pwr8LoadOpcodes, Pwr9LoadOpcodes }
+
class PPCSubtarget;
class PPCInstrInfo : public PPCGenInstrInfo {
PPCSubtarget &Subtarget;
const PPCRegisterInfo RI;
+ const unsigned StoreSpillOpcodesArray[2][SOK_LastOpcodeSpill] =
+ StoreOpcodesForSpill;
+ const unsigned LoadSpillOpcodesArray[2][SOK_LastOpcodeSpill] =
+ LoadOpcodesForSpill;
void StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill,
int FrameIdx, const TargetRegisterClass *RC,
@@ -121,13 +187,21 @@ class PPCInstrInfo : public PPCGenInstrInfo {
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr *> &NewMIs) const;
- // If the inst has imm-form and one of its operand is produced by a LI,
- // put the imm into the inst directly and remove the LI if possible.
+ // Replace the instruction with single LI if possible. \p DefMI must be LI or
+ // LI8.
+ bool simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
+ unsigned OpNoForForwarding, MachineInstr **KilledDef) const;
+ // If the inst is imm-form and its register operand is produced by a ADDI, put
+ // the imm into the inst directly and remove the ADDI if possible.
+ bool transformToNewImmFormFedByAdd(MachineInstr &MI, MachineInstr &DefMI,
+ unsigned OpNoForForwarding) const;
+ // If the inst is x-form and has imm-form and one of its operand is produced
+ // by a LI, put the imm into the inst directly and remove the LI if possible.
bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III,
- unsigned ConstantOpNo, MachineInstr &DefMI,
- int64_t Imm) const;
- // If the inst has imm-form and one of its operand is produced by an
- // add-immediate, try to transform it when possible.
+ unsigned ConstantOpNo,
+ MachineInstr &DefMI) const;
+ // If the inst is x-form and has imm-form and one of its operand is produced
+ // by an add-immediate, try to transform it when possible.
bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III,
unsigned ConstantOpNo, MachineInstr &DefMI,
bool KillDefMI) const;
@@ -151,13 +225,20 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool isImmElgibleForForwarding(const MachineOperand &ImmMO,
const MachineInstr &DefMI,
const ImmInstrInfo &III,
- int64_t &Imm) const;
+ int64_t &Imm,
+ int64_t BaseImm = 0) const;
bool isRegElgibleForForwarding(const MachineOperand &RegMO,
const MachineInstr &DefMI,
const MachineInstr &MI, bool KillDefMI,
bool &IsFwdFeederRegKilled) const;
+ unsigned getSpillTarget() const;
const unsigned *getStoreOpcodesForSpillArray() const;
const unsigned *getLoadOpcodesForSpillArray() const;
+ int16_t getFMAOpIdxInfo(unsigned Opcode) const;
+ void reassociateFMA(MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
virtual void anchor();
protected:
@@ -187,6 +268,10 @@ public:
bool isXFormMemOp(unsigned Opcode) const {
return get(Opcode).TSFlags & PPCII::XFormMemOp;
}
+ bool isPrefixed(unsigned Opcode) const {
+ return get(Opcode).TSFlags & PPCII::Prefixed;
+ }
+
static bool isSameClassPhysRegCopy(unsigned Opcode) {
unsigned CopyOpcodes[] =
{ PPC::OR, PPC::OR8, PPC::FMR, PPC::VOR, PPC::XXLOR, PPC::XXLORf,
@@ -233,6 +318,20 @@ public:
return true;
}
+ /// When getMachineCombinerPatterns() finds patterns, this function generates
+ /// the instructions that could replace the original code sequence
+ void genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
+
+ /// Return true when there is potentially a faster code sequence for a fma
+ /// chain ending in \p Root. All potential patterns are output in the \p
+ /// P array.
+ bool getFMAPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &P) const;
+
/// Return true when there is potentially a faster code sequence
/// for an instruction chain ending in <Root>. All potential patterns are
/// output in the <Pattern> array.
@@ -242,8 +341,24 @@ public:
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+ /// On PowerPC, we try to reassociate FMA chain which will increase
+ /// instruction size. Set extension resource length limit to 1 for edge case.
+ /// Resource Length is calculated by scaled resource usage in getCycles().
+ /// Because of the division in getCycles(), it returns different cycles due to
+ /// legacy scaled resource usage. So new resource length may be same with
+ /// legacy or 1 bigger than legacy.
+ /// We need to execlude the 1 bigger case even the resource length is not
+ /// perserved for more FMA chain reassociations on PowerPC.
+ int getExtendResourceLenLimit() const override { return 1; }
+
+ void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
+ MachineInstr &NewMI1,
+ MachineInstr &NewMI2) const override;
+
+ void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const override;
+
bool isCoalescableExtInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
+ Register &SrcReg, Register &DstReg,
unsigned &SubIdx) const override;
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
@@ -273,11 +388,12 @@ public:
// Select analysis.
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
- unsigned, unsigned, int &, int &, int &) const override;
+ Register, Register, Register, int &, int &,
+ int &) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DstReg,
- ArrayRef<MachineOperand> Cond, unsigned TrueReg,
- unsigned FalseReg) const override;
+ const DebugLoc &DL, Register DstReg,
+ ArrayRef<MachineOperand> Cond, Register TrueReg,
+ Register FalseReg) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
@@ -285,28 +401,47 @@ public:
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+ // Emits a register spill without updating the register class for vector
+ // registers. This ensures that when we spill a vector register the
+ // element order in the register is the same as it was in memory.
+ void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
+ Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- unsigned getStoreOpcodeForSpill(unsigned Reg,
- const TargetRegisterClass *RC = nullptr) const;
+ // Emits a register reload without updating the register class for vector
+ // registers. This ensures that when we reload a vector register the
+ // element order in the register is the same as it was in memory.
+ void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const;
- unsigned getLoadOpcodeForSpill(unsigned Reg,
- const TargetRegisterClass *RC = nullptr) const;
+ unsigned getLoadOpcodeForSpill(const TargetRegisterClass *RC) const;
bool
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
- bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
+ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const override;
+ bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+ Register Reg) const;
+
// If conversion by predication (only supported by some branch instructions).
// All of the profitability checks always return true; it is always
// profitable to use the predicated branches.
@@ -335,8 +470,6 @@ public:
// Predication support.
bool isPredicated(const MachineInstr &MI) const override;
- bool isUnpredicatedTerminator(const MachineInstr &MI) const override;
-
bool PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const override;
@@ -348,11 +481,11 @@ public:
// Comparison optimization.
- bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &Mask, int &Value) const override;
+ bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &Mask, int &Value) const override;
- bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
- unsigned SrcReg2, int Mask, int Value,
+ bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int Mask, int Value,
const MachineRegisterInfo *MRI) const override;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index b38ca3af63f56..673ab63039cf7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -37,9 +37,6 @@ def SDT_PPCstore_scal_int_from_vsr : SDTypeProfile<0, 3, [
def SDT_PPCVexts : SDTypeProfile<1, 2, [
SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2>
]>;
-def SDT_PPCSExtVElems : SDTypeProfile<1, 1, [
- SDTCisVec<0>, SDTCisVec<1>
-]>;
def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
@@ -53,6 +50,10 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisInt<2>
]>;
+def SDT_PPCSpToDp : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>,
+ SDTCisInt<1>
+]>;
+
def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3>
]>;
@@ -151,19 +152,19 @@ def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx,
def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix,
[SDNPHasChain, SDNPMayStore]>;
def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>;
-def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>;
// Extract FPSCR (not modeled at the DAG level).
def PPCmffs : SDNode<"PPCISD::MFFS",
- SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
+ SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
+ [SDNPHasChain]>;
// Perform FADD in round-to-zero mode.
def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
-def PPCfsel : SDNode<"PPCISD::FSEL",
+def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
- SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
def PPCxsmaxc : SDNode<"PPCISD::XSMAXCDP", SDT_PPCFPMinMax, []>;
def PPCxsminc : SDNode<"PPCISD::XSMINCDP", SDT_PPCFPMinMax, []>;
@@ -171,8 +172,6 @@ def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,
[SDNPMayLoad, SDNPMemOperand]>;
-def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
-def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
def PPCppc32GOT : SDNode<"PPCISD::PPC32_GOT", SDTIntLeaf, []>;
@@ -199,6 +198,7 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
+def PPCxxspltidp : SDNode<"PPCISD::XXSPLTI_SP_TO_DP", SDT_PPCSpToDp, []>;
def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>;
def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
@@ -221,6 +221,8 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
+def PPCfnmsub : SDNode<"PPCISD::FNMSUB" , SDTFPTernaryOp>;
+
def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>;
// Move 2 i64 values into a VSX register
@@ -255,6 +257,9 @@ def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def PPCcall_notoc : SDNode<"PPCISD::CALL_NOTOC", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
@@ -318,11 +323,32 @@ def SDTDynOp : SDTypeProfile<1, 2, []>;
def SDTDynAreaOp : SDTypeProfile<1, 1, []>;
def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
def PPCdynareaoffset : SDNode<"PPCISD::DYNAREAOFFSET", SDTDynAreaOp, [SDNPHasChain]>;
+def PPCprobedalloca : SDNode<"PPCISD::PROBED_ALLOCA", SDTDynOp, [SDNPHasChain]>;
+
+// PC Relative Specific Nodes
+def PPCmatpcreladdr : SDNode<"PPCISD::MAT_PCREL_ADDR", SDTIntUnaryOp, []>;
//===----------------------------------------------------------------------===//
// PowerPC specific transformation functions and pattern fragments.
//
+// A floating point immediate that is not a positive zero and can be converted
+// to a single precision floating point non-denormal immediate without loss of
+// information.
+def nzFPImmAsi32 : PatLeaf<(fpimm), [{
+ APFloat APFloatOfN = N->getValueAPF();
+ return convertToNonDenormSingle(APFloatOfN) && !N->isExactlyValue(+0.0);
+}]>;
+
+// Convert the floating point immediate into a 32 bit floating point immediate
+// and get a i32 with the resulting bits.
+def getFPAs32BitInt : SDNodeXForm<fpimm, [{
+ APFloat APFloatOfN = N->getValueAPF();
+ convertToNonDenormSingle(APFloatOfN);
+ return CurDAG->getTargetConstant(APFloatOfN.bitcastToAPInt().getZExtValue(),
+ SDLoc(N), MVT::i32);
+}]>;
+
def SHL32 : SDNodeXForm<imm, [{
// Transformation function: 31 - imm
return getI32Imm(31 - N->getZExtValue(), SDLoc(N));
@@ -386,9 +412,10 @@ def immZExt16 : PatLeaf<(imm), [{
// field. Used by instructions like 'ori'.
return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
}], LO16>;
-def immNonAllOneAnyExt8 : ImmLeaf<i32, [{
+def immNonAllOneAnyExt8 : ImmLeaf<i32, [{
return (isInt<8>(Imm) && (Imm != -1)) || (isUInt<8>(Imm) && (Imm != 0xFF));
}]>;
+def i32immNonAllOneNonZero : ImmLeaf<i32, [{ return Imm && (Imm != -1); }]>;
def immSExt5NonZero : ImmLeaf<i32, [{ return Imm && isInt<5>(Imm); }]>;
// imm16Shifted* - These match immediates where the low 16-bits are zero. There
@@ -404,7 +431,7 @@ def imm16ShiftedZExt : PatLeaf<(imm), [{
def imm16ShiftedSExt : PatLeaf<(imm), [{
// imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the
- // immediate are set. Used by instructions like 'addis'. Identical to
+ // immediate are set. Used by instructions like 'addis'. Identical to
// imm16ShiftedZExt in 32-bit mode.
if (N->getZExtValue() & 0xFFFF) return false;
if (N->getValueType(0) == MVT::i32)
@@ -723,6 +750,27 @@ def s17imm : Operand<i32> {
let ParserMatchClass = PPCS17ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<16>";
}
+def PPCS34ImmAsmOperand : AsmOperandClass {
+ let Name = "S34Imm";
+ let PredicateMethod = "isS34Imm";
+ let RenderMethod = "addImmOperands";
+}
+def s34imm : Operand<i64> {
+ let PrintMethod = "printS34ImmOperand";
+ let EncoderMethod = "getImm34Encoding";
+ let ParserMatchClass = PPCS34ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<34>";
+}
+def PPCImmZeroAsmOperand : AsmOperandClass {
+ let Name = "ImmZero";
+ let PredicateMethod = "isImmZero";
+ let RenderMethod = "addImmOperands";
+}
+def immZero : Operand<i32> {
+ let PrintMethod = "printImmZeroOperand";
+ let ParserMatchClass = PPCImmZeroAsmOperand;
+ let DecoderMethod = "decodeImmZeroOperand";
+}
def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
@@ -733,7 +781,9 @@ def PPCDirectBrAsmOperand : AsmOperandClass {
def directbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
+ let DecoderMethod = "decodeDirectBrTarget";
let ParserMatchClass = PPCDirectBrAsmOperand;
+ let OperandType = "OPERAND_PCREL";
}
def absdirectbrtarget : Operand<OtherVT> {
let PrintMethod = "printAbsBranchOperand";
@@ -747,7 +797,9 @@ def PPCCondBrAsmOperand : AsmOperandClass {
def condbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getCondBrEncoding";
+ let DecoderMethod = "decodeCondBrTarget";
let ParserMatchClass = PPCCondBrAsmOperand;
+ let OperandType = "OPERAND_PCREL";
}
def abscondbrtarget : Operand<OtherVT> {
let PrintMethod = "printAbsBranchOperand";
@@ -757,7 +809,7 @@ def abscondbrtarget : Operand<OtherVT> {
def calltarget : Operand<iPTR> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
- let DecoderMethod = "DecodePCRel24BranchTarget";
+ let DecoderMethod = "decodeDirectBrTarget";
let ParserMatchClass = PPCDirectBrAsmOperand;
let OperandType = "OPERAND_PCREL";
}
@@ -783,6 +835,30 @@ def PPCRegGxRCNoR0Operand : AsmOperandClass {
def ptr_rc_nor0 : Operand<iPTR>, PointerLikeRegClass<1> {
let ParserMatchClass = PPCRegGxRCNoR0Operand;
}
+
+// New addressing modes with 34 bit immediates.
+def PPCDispRI34Operand : AsmOperandClass {
+ let Name = "DispRI34"; let PredicateMethod = "isS34Imm";
+ let RenderMethod = "addImmOperands";
+}
+def dispRI34 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispRI34Operand;
+}
+def memri34 : Operand<iPTR> { // memri, imm is a 34-bit value.
+ let PrintMethod = "printMemRegImm34";
+ let MIOperandInfo = (ops dispRI34:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getMemRI34Encoding";
+ let DecoderMethod = "decodeMemRI34Operands";
+}
+// memri, imm is a 34-bit value for pc-relative instructions where
+// base register is set to zero.
+def memri34_pcrel : Operand<iPTR> { // memri, imm is a 34-bit value.
+ let PrintMethod = "printMemRegImm34PCRel";
+ let MIOperandInfo = (ops dispRI34:$imm, immZero:$reg);
+ let EncoderMethod = "getMemRI34PCRelEncoding";
+ let DecoderMethod = "decodeMemRI34PCRelOperands";
+}
+
// A version of ptr_rc usable with the asm parser.
def PPCRegGxRCOperand : AsmOperandClass {
let Name = "RegGxRC"; let PredicateMethod = "isRegNumber";
@@ -876,7 +952,7 @@ def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
}
// A single-register address. This is used with the SjLj
-// pseudo-instructions which tranlates to LD/LWZ. These instructions requires
+// pseudo-instructions which translates to LD/LWZ. These instructions requires
// G8RC_NOX0 registers.
def memr : Operand<iPTR> {
let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg);
@@ -913,11 +989,11 @@ def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
// Below forms are all x-form addressing mode, use three different ones so we
// can make a accurate check for x-form instructions in ISEL.
-// x-form addressing mode whose associated diplacement form is D.
+// x-form addressing mode whose associated displacement form is D.
def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; // "stbx"
-// x-form addressing mode whose associated diplacement form is DS.
+// x-form addressing mode whose associated displacement form is DS.
def xaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrIdxX4", [], []>; // "stdx"
-// x-form addressing mode whose associated diplacement form is DQ.
+// x-form addressing mode whose associated displacement form is DQ.
def xaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrIdxX16", [], []>; // "stxvx"
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
@@ -929,26 +1005,32 @@ def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
/// This is just the offset part of iaddr, used for preinc.
def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+// PC Relative Address
+def pcreladdr : ComplexPattern<iPTR, 1, "SelectAddrPCRel", [], []>;
+
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
-def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">;
-def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">;
-def IsBookE : Predicate<"PPCSubTarget->isBookE()">;
-def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">;
-def HasOnlyMSYNC : Predicate<"PPCSubTarget->hasOnlyMSYNC()">;
-def HasSYNC : Predicate<"!PPCSubTarget->hasOnlyMSYNC()">;
-def IsPPC4xx : Predicate<"PPCSubTarget->isPPC4xx()">;
-def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">;
-def IsE500 : Predicate<"PPCSubTarget->isE500()">;
-def HasSPE : Predicate<"PPCSubTarget->hasSPE()">;
-def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
-def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">;
-def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
-def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">;
-def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">;
-def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">;
-def IsISA3_0 : Predicate<"PPCSubTarget->isISA3_0()">;
-def HasFPU : Predicate<"PPCSubTarget->hasFPU()">;
+def In32BitMode : Predicate<"!Subtarget->isPPC64()">;
+def In64BitMode : Predicate<"Subtarget->isPPC64()">;
+def IsBookE : Predicate<"Subtarget->isBookE()">;
+def IsNotBookE : Predicate<"!Subtarget->isBookE()">;
+def HasOnlyMSYNC : Predicate<"Subtarget->hasOnlyMSYNC()">;
+def HasSYNC : Predicate<"!Subtarget->hasOnlyMSYNC()">;
+def IsPPC4xx : Predicate<"Subtarget->isPPC4xx()">;
+def IsPPC6xx : Predicate<"Subtarget->isPPC6xx()">;
+def IsE500 : Predicate<"Subtarget->isE500()">;
+def HasSPE : Predicate<"Subtarget->hasSPE()">;
+def HasICBT : Predicate<"Subtarget->hasICBT()">;
+def HasPartwordAtomics : Predicate<"Subtarget->hasPartwordAtomics()">;
+def NoNaNsFPMath
+ : Predicate<"Subtarget->getTargetMachine().Options.NoNaNsFPMath">;
+def NaNsFPMath
+ : Predicate<"!Subtarget->getTargetMachine().Options.NoNaNsFPMath">;
+def HasBPERMD : Predicate<"Subtarget->hasBPERMD()">;
+def HasExtDiv : Predicate<"Subtarget->hasExtDiv()">;
+def IsISA3_0 : Predicate<"Subtarget->isISA3_0()">;
+def HasFPU : Predicate<"Subtarget->hasFPU()">;
+def PCRelativeMemops : Predicate<"Subtarget->hasPCRelativeMemops()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
@@ -1318,7 +1400,20 @@ def DYNALLOC : PPCEmitTimePseudo<(outs gprc:$result), (ins gprc:$negsize, memri:
(PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
def DYNAREAOFFSET : PPCEmitTimePseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
[(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
-
+// Probed alloca to support stack clash protection.
+let Defs = [R1], Uses = [R1], hasNoSchedulingInfo = 1 in {
+def PROBED_ALLOCA_32 : PPCCustomInserterPseudo<(outs gprc:$result),
+ (ins gprc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_32",
+ [(set i32:$result,
+ (PPCprobedalloca i32:$negsize, iaddr:$fpsi))]>;
+def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs gprc:$fp,
+ gprc:$sp),
+ (ins gprc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_32", []>;
+def PROBED_STACKALLOC_32 : PPCEmitTimePseudo<(outs gprc:$scratch, gprc:$temp),
+ (ins i64imm:$stacksize),
+ "#PROBED_STACKALLOC_32", []>;
+}
+
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
let PPC970_Single = 1 in {
@@ -1412,7 +1507,7 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
}
// Set the float rounding mode.
-let Uses = [RM], Defs = [RM] in {
+let Uses = [RM], Defs = [RM] in {
def SETRNDi : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins u2imm:$RND),
"#SETRNDi", [(set f64:$FRT, (int_ppc_setrnd (i32 imm:$RND)))]>;
@@ -1724,6 +1819,8 @@ def RFEBB : XLForm_S<19, 146, (outs), (ins u1imm:$imm), "rfebb $imm",
IIC_BrB, [(PPCrfebb (i32 imm:$imm))]>,
PPC970_DGroup_Single;
+def : InstAlias<"rfebb", (RFEBB 1)>;
+
// DCB* instructions.
def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst",
IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
@@ -1777,6 +1874,11 @@ def : Pat<(prefetch xoaddr:$dst, (i32 1), imm, (i32 1)),
def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
(ICBT 0, xoaddr:$dst)>, Requires<[HasICBT]>; // inst prefetch (for read)
+def : Pat<(int_ppc_dcbt_with_hint xoaddr:$dst, i32:$TH),
+ (DCBT i32:$TH, xoaddr:$dst)>;
+def : Pat<(int_ppc_dcbtst_with_hint xoaddr:$dst, i32:$TH),
+ (DCBTST i32:$TH, xoaddr:$dst)>;
+
// Atomic operations
// FIXME: some of these might be used with constant operands. This will result
// in constant materialization instructions that may be redundant. We currently
@@ -1969,7 +2071,7 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
// PPC32 Load Instructions.
//
-// Unindexed (r+i) Loads.
+// Unindexed (r+i) Loads.
let PPC970_Unit = 2 in {
def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
"lbz $rD, $src", IIC_LdStLoad,
@@ -2112,7 +2214,7 @@ def LFIWZX : XForm_25_memOp<31, 887, (outs f8rc:$frD), (ins memrr:$src),
}
// Load Multiple
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
"lmw $rD, $src", IIC_LdStLMW, []>;
@@ -2281,10 +2383,15 @@ let isCodeGenOnly = 1 in {
}
}
+// We used to have EIEIO as value but E[0-9A-Z] is a reserved name
+def EnforceIEIO : XForm_24_eieio<31, 854, (outs), (ins),
+ "eieio", IIC_LdStLoad, []>;
+
def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[HasSYNC]>;
def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[HasSYNC]>;
def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
+def : Pat<(int_ppc_eieio), (EnforceIEIO)>;
//===----------------------------------------------------------------------===//
// PPC32 Arithmetic Instructions.
@@ -2331,6 +2438,9 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
}
}
+def : InstAlias<"li $rD, $imm", (ADDI gprc:$rD, ZERO, s16imm:$imm)>;
+def : InstAlias<"lis $rD, $imm", (ADDIS gprc:$rD, ZERO, s17imm:$imm)>;
+
let PPC970_Unit = 1 in { // FXU Operations.
let Defs = [CR0] in {
def ANDI_rec : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
@@ -2419,6 +2529,14 @@ defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
[(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
}
+def : InstAlias<"mr $rA, $rB", (OR gprc:$rA, gprc:$rB, gprc:$rB)>;
+def : InstAlias<"mr. $rA, $rB", (OR_rec gprc:$rA, gprc:$rB, gprc:$rB)>;
+
+def : InstAlias<"not $rA, $rS", (NOR gprc:$rA, gprc:$rS, gprc:$rS)>;
+def : InstAlias<"not. $rA, $rS", (NOR_rec gprc:$rA, gprc:$rS, gprc:$rS)>;
+
+def : InstAlias<"nop", (ORI R0, R0, 0)>;
+
let PPC970_Unit = 1 in { // FXU Operations.
let hasSideEffects = 0 in {
defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH),
@@ -2465,7 +2583,7 @@ def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB),
"ftsqrt $crD, $fB", IIC_FPCompare>;
-let Uses = [RM] in {
+let Uses = [RM], mayRaiseFPException = 1 in {
let hasSideEffects = 0 in {
defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiw", "$frD, $frB", IIC_FPGeneral,
@@ -2479,46 +2597,46 @@ let Uses = [RM] in {
defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
"frsp", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (fpround f64:$frB))]>;
+ [(set f32:$frD, (any_fpround f64:$frB))]>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
"frin", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (fround f64:$frB))]>;
+ [(set f64:$frD, (any_fround f64:$frB))]>;
defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
"frin", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (fround f32:$frB))]>;
+ [(set f32:$frD, (any_fround f32:$frB))]>;
}
let hasSideEffects = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB),
"frip", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (fceil f64:$frB))]>;
+ [(set f64:$frD, (any_fceil f64:$frB))]>;
defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB),
"frip", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (fceil f32:$frB))]>;
+ [(set f32:$frD, (any_fceil f32:$frB))]>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB),
"friz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (ftrunc f64:$frB))]>;
+ [(set f64:$frD, (any_ftrunc f64:$frB))]>;
defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB),
"friz", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (ftrunc f32:$frB))]>;
+ [(set f32:$frD, (any_ftrunc f32:$frB))]>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB),
"frim", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (ffloor f64:$frB))]>;
+ [(set f64:$frD, (any_ffloor f64:$frB))]>;
defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB),
"frim", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (ffloor f32:$frB))]>;
+ [(set f32:$frD, (any_ffloor f32:$frB))]>;
defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB),
"fsqrt", "$frD, $frB", IIC_FPSqrtD,
- [(set f64:$frD, (fsqrt f64:$frB))]>;
+ [(set f64:$frD, (any_fsqrt f64:$frB))]>;
defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB),
"fsqrts", "$frD, $frB", IIC_FPSqrtS,
- [(set f32:$frD, (fsqrt f32:$frB))]>;
+ [(set f32:$frD, (any_fsqrt f32:$frB))]>;
}
}
}
@@ -2786,6 +2904,8 @@ def MCRXRX : X_BF3<31, 576, (outs crrc:$BF), (ins),
"mcrxrx $BF", IIC_BrMCRX>, Requires<[IsISA3_0]>;
} // hasSideEffects = 0
+def : InstAlias<"mtcr $rA", (MTCRF 255, gprc:$rA)>;
+
let Predicates = [HasFPU] in {
// Custom inserter instruction to perform FADD in round-to-zero mode.
let Uses = [RM] in {
@@ -2795,7 +2915,7 @@ let Uses = [RM] in {
// The above pseudo gets expanded to make use of the following instructions
// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level.
-let Uses = [RM], Defs = [RM] in {
+let Uses = [RM], Defs = [RM] in {
def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
"mtfsb0 $FM", IIC_IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
@@ -2931,49 +3051,54 @@ defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA),
}
}
+def : InstAlias<"sub $rA, $rB, $rC", (SUBF gprc:$rA, gprc:$rC, gprc:$rB)>;
+def : InstAlias<"sub. $rA, $rB, $rC", (SUBF_rec gprc:$rA, gprc:$rC, gprc:$rB)>;
+def : InstAlias<"subc $rA, $rB, $rC", (SUBFC gprc:$rA, gprc:$rC, gprc:$rB)>;
+def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC_rec gprc:$rA, gprc:$rC, gprc:$rB)>;
+
// A-Form instructions. Most of the instructions executed in the FPU are of
// this type.
//
let PPC970_Unit = 3, hasSideEffects = 0, Predicates = [HasFPU] in { // FPU Operations.
let Uses = [RM] in {
let isCommutable = 1 in {
- defm FMADD : AForm_1r<63, 29,
+ defm FMADD : AForm_1r<63, 29,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
+ [(set f64:$FRT, (any_fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
defm FMADDS : AForm_1r<59, 29,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
- [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
+ [(set f32:$FRT, (any_fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
defm FMSUB : AForm_1r<63, 28,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set f64:$FRT,
- (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
+ (any_fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
defm FMSUBS : AForm_1r<59, 28,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT,
- (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
+ (any_fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
defm FNMADD : AForm_1r<63, 31,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fnmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set f64:$FRT,
- (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
+ (fneg (any_fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
defm FNMADDS : AForm_1r<59, 31,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fnmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT,
- (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
+ (fneg (any_fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
defm FNMSUB : AForm_1r<63, 30,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
"fnmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
- [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
+ [(set f64:$FRT, (fneg (any_fma f64:$FRA, f64:$FRC,
(fneg f64:$FRB))))]>;
defm FNMSUBS : AForm_1r<59, 30,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fnmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
- [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
+ [(set f32:$FRT, (fneg (any_fma f32:$FRA, f32:$FRC,
(fneg f32:$FRB))))]>;
} // isCommutable
}
@@ -2990,43 +3115,43 @@ defm FSELS : AForm_1r<63, 23,
(outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB),
"fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
[(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
-let Uses = [RM] in {
+let Uses = [RM], mayRaiseFPException = 1 in {
let isCommutable = 1 in {
defm FADD : AForm_2r<63, 21,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
"fadd", "$FRT, $FRA, $FRB", IIC_FPAddSub,
- [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
+ [(set f64:$FRT, (any_fadd f64:$FRA, f64:$FRB))]>;
defm FADDS : AForm_2r<59, 21,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
"fadds", "$FRT, $FRA, $FRB", IIC_FPGeneral,
- [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
+ [(set f32:$FRT, (any_fadd f32:$FRA, f32:$FRB))]>;
} // isCommutable
defm FDIV : AForm_2r<63, 18,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
"fdiv", "$FRT, $FRA, $FRB", IIC_FPDivD,
- [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
+ [(set f64:$FRT, (any_fdiv f64:$FRA, f64:$FRB))]>;
defm FDIVS : AForm_2r<59, 18,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
"fdivs", "$FRT, $FRA, $FRB", IIC_FPDivS,
- [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
+ [(set f32:$FRT, (any_fdiv f32:$FRA, f32:$FRB))]>;
let isCommutable = 1 in {
defm FMUL : AForm_3r<63, 25,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC),
"fmul", "$FRT, $FRA, $FRC", IIC_FPFused,
- [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
+ [(set f64:$FRT, (any_fmul f64:$FRA, f64:$FRC))]>;
defm FMULS : AForm_3r<59, 25,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC),
"fmuls", "$FRT, $FRA, $FRC", IIC_FPGeneral,
- [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
+ [(set f32:$FRT, (any_fmul f32:$FRA, f32:$FRC))]>;
} // isCommutable
defm FSUB : AForm_2r<63, 20,
(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
"fsub", "$FRT, $FRA, $FRB", IIC_FPAddSub,
- [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
+ [(set f64:$FRT, (any_fsub f64:$FRA, f64:$FRB))]>;
defm FSUBS : AForm_2r<59, 20,
(outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
"fsubs", "$FRT, $FRA, $FRB", IIC_FPGeneral,
- [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
+ [(set f32:$FRT, (any_fsub f32:$FRA, f32:$FRB))]>;
}
}
@@ -3158,16 +3283,16 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS $in, tblockaddress:$g)>;
// Support for thread-local storage.
-def PPC32GOT: PPCEmitTimePseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
+def PPC32GOT: PPCEmitTimePseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
[(set i32:$rD, (PPCppc32GOT))]>;
// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode.
// This uses two output registers, the first as the real output, the second as a
// temporary register, used internally in code generation.
-def PPC32PICGOT: PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
+def PPC32PICGOT: PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
[]>, NoEncode<"$rT">;
-def LDgotTprelL32: PPCEmitTimePseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
+def LDgotTprelL32: PPCEmitTimePseudo<(outs gprc_nor0:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
"#LDgotTprelL32",
[(set i32:$rD,
(PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
@@ -3302,15 +3427,19 @@ def : Pat<(atomic_fence (timm), (timm)), (SYNC 1)>, Requires<[HasSYNC]>;
def : Pat<(atomic_fence (timm), (timm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
let Predicates = [HasFPU] in {
-// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
-def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
- (FNMSUB $A, $C, $B)>;
-def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
- (FNMSUB $A, $C, $B)>;
-def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
- (FNMSUBS $A, $C, $B)>;
-def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
- (FNMSUBS $A, $C, $B)>;
+// Additional fnmsub patterns for custom node
+def : Pat<(PPCfnmsub f64:$A, f64:$B, f64:$C),
+ (FNMSUB $A, $B, $C)>;
+def : Pat<(PPCfnmsub f32:$A, f32:$B, f32:$C),
+ (FNMSUBS $A, $B, $C)>;
+def : Pat<(fneg (PPCfnmsub f64:$A, f64:$B, f64:$C)),
+ (FMSUB $A, $B, $C)>;
+def : Pat<(fneg (PPCfnmsub f32:$A, f32:$B, f32:$C)),
+ (FMSUBS $A, $B, $C)>;
+def : Pat<(PPCfnmsub f64:$A, f64:$B, (fneg f64:$C)),
+ (FNMADD $A, $B, $C)>;
+def : Pat<(PPCfnmsub f32:$A, f32:$B, (fneg f32:$C)),
+ (FNMADDS $A, $B, $C)>;
// FCOPYSIGN's operand types need not agree.
def : Pat<(fcopysign f64:$frB, f32:$frA),
@@ -3331,6 +3460,10 @@ def crnot : OutPatFrag<(ops node:$in),
def : Pat<(not i1:$in),
(crnot $in)>;
+// Prefixed instructions may require access to the above defs at a later
+// time so we include this after the def.
+include "PPCInstrPrefix.td"
+
// Patterns for arithmetic i1 operations.
def : Pat<(add i1:$a, i1:$b),
(CRXOR $a, $b)>;
@@ -3510,7 +3643,7 @@ defm : ExtSetCCPat<SETEQ,
(RLWINM (CNTLZW $in), 27, 31, 31)>,
OutPatFrag<(ops node:$in),
(RLDICL (CNTLZD $in), 58, 63)> >;
-
+
defm : ExtSetCCPat<SETNE,
PatFrag<(ops node:$in, node:$cc),
(setcc $in, 0, $cc)>,
@@ -3518,7 +3651,7 @@ defm : ExtSetCCPat<SETNE,
(RLWINM (i32not (CNTLZW $in)), 27, 31, 31)>,
OutPatFrag<(ops node:$in),
(RLDICL (i64not (CNTLZD $in)), 58, 63)> >;
-
+
defm : ExtSetCCPat<SETLT,
PatFrag<(ops node:$in, node:$cc),
(setcc $in, 0, $cc)>,
@@ -4128,10 +4261,6 @@ def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
"icbi $src", IIC_LdStICBI, []>;
-// We used to have EIEIO as value but E[0-9A-Z] is a reserved name
-def EnforceIEIO : XForm_24_eieio<31, 854, (outs), (ins),
- "eieio", IIC_LdStLoad, []>;
-
def WAIT : XForm_24_sync<31, 30, (outs), (ins i32imm:$L),
"wait $L", IIC_LdStLoad, []>;
@@ -4421,17 +4550,53 @@ def DCBFx : PPCAsmPseudo<"dcbf $dst", (ins memrr:$dst)>;
def DCBFL : PPCAsmPseudo<"dcbfl $dst", (ins memrr:$dst)>;
def DCBFLP : PPCAsmPseudo<"dcbflp $dst", (ins memrr:$dst)>;
+def : Pat<(int_ppc_isync), (ISYNC)>;
+def : Pat<(int_ppc_dcbfl xoaddr:$dst),
+ (DCBF 1, xoaddr:$dst)>;
+def : Pat<(int_ppc_dcbflp xoaddr:$dst),
+ (DCBF 3, xoaddr:$dst)>;
+
def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
+def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>;
+def : InstAlias<"mftbl $Rx", (MFTB gprc:$Rx, 268)>;
+def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>;
+
+def : InstAlias<"xnop", (XORI R0, R0, 0)>;
+
+foreach BR = 0-7 in {
+ def : InstAlias<"mfbr"#BR#" $Rx",
+ (MFDCR gprc:$Rx, !add(BR, 0x80))>,
+ Requires<[IsPPC4xx]>;
+ def : InstAlias<"mtbr"#BR#" $Rx",
+ (MTDCR gprc:$Rx, !add(BR, 0x80))>,
+ Requires<[IsPPC4xx]>;
+}
+
+def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
+def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
+
def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>;
def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>;
+def : InstAlias<"mtudscr $Rx", (MTSPR 3, gprc:$Rx)>;
+def : InstAlias<"mfudscr $Rx", (MFSPR gprc:$Rx, 3)>;
+
def : InstAlias<"mfrtcu $Rx", (MFSPR gprc:$Rx, 4)>;
def : InstAlias<"mfrtcl $Rx", (MFSPR gprc:$Rx, 5)>;
+def : InstAlias<"mtlr $Rx", (MTSPR 8, gprc:$Rx)>;
+def : InstAlias<"mflr $Rx", (MFSPR gprc:$Rx, 8)>;
+
+def : InstAlias<"mtctr $Rx", (MTSPR 9, gprc:$Rx)>;
+def : InstAlias<"mfctr $Rx", (MFSPR gprc:$Rx, 9)>;
+
+def : InstAlias<"mtuamr $Rx", (MTSPR 13, gprc:$Rx)>;
+def : InstAlias<"mfuamr $Rx", (MFSPR gprc:$Rx, 13)>;
+
def : InstAlias<"mtdscr $Rx", (MTSPR 17, gprc:$Rx)>;
def : InstAlias<"mfdscr $Rx", (MFSPR gprc:$Rx, 17)>;
@@ -4453,12 +4618,6 @@ def : InstAlias<"mfsrr0 $Rx", (MFSPR gprc:$Rx, 26)>;
def : InstAlias<"mtsrr1 $Rx", (MTSPR 27, gprc:$Rx)>;
def : InstAlias<"mfsrr1 $Rx", (MFSPR gprc:$Rx, 27)>;
-def : InstAlias<"mtsrr2 $Rx", (MTSPR 990, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfsrr2 $Rx", (MFSPR gprc:$Rx, 990)>, Requires<[IsPPC4xx]>;
-
-def : InstAlias<"mtsrr3 $Rx", (MTSPR 991, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfsrr3 $Rx", (MFSPR gprc:$Rx, 991)>, Requires<[IsPPC4xx]>;
-
def : InstAlias<"mtcfar $Rx", (MTSPR 28, gprc:$Rx)>;
def : InstAlias<"mfcfar $Rx", (MFSPR gprc:$Rx, 28)>;
@@ -4468,27 +4627,34 @@ def : InstAlias<"mfamr $Rx", (MFSPR gprc:$Rx, 29)>;
def : InstAlias<"mtpid $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsBookE]>;
def : InstAlias<"mfpid $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsBookE]>;
-def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>;
-def : InstAlias<"mftbl $Rx", (MFTB gprc:$Rx, 268)>;
-def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>;
-
-def : InstAlias<"mttbl $Rx", (MTSPR 284, gprc:$Rx)>;
-def : InstAlias<"mttbu $Rx", (MTSPR 285, gprc:$Rx)>;
+foreach SPRG = 4-7 in {
+ def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 256))>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
+ Requires<[IsBookE]>;
+}
-def : InstAlias<"mftblo $Rx", (MFSPR gprc:$Rx, 989)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mttblo $Rx", (MTSPR 989, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mftbhi $Rx", (MFSPR gprc:$Rx, 988)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+foreach SPRG = 0-3 in {
+ def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
+ def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
+}
-def : InstAlias<"xnop", (XORI R0, R0, 0)>;
+def : InstAlias<"mfasr $RT", (MFSPR gprc:$RT, 280)>;
+def : InstAlias<"mtasr $RT", (MTSPR 280, gprc:$RT)>;
-def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
-def : InstAlias<"mr. $rA, $rB", (OR8_rec g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"mttbl $Rx", (MTSPR 284, gprc:$Rx)>;
+def : InstAlias<"mttbu $Rx", (MTSPR 285, gprc:$Rx)>;
-def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
-def : InstAlias<"not. $rA, $rB", (NOR8_rec g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"mfpvr $RT", (MFSPR gprc:$RT, 287)>;
-def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
+def : InstAlias<"mfspefscr $Rx", (MFSPR gprc:$Rx, 512)>;
+def : InstAlias<"mtspefscr $Rx", (MTSPR 512, gprc:$Rx)>;
foreach BATR = 0-3 in {
def : InstAlias<"mtdbatu "#BATR#", $Rx",
@@ -4517,86 +4683,36 @@ foreach BATR = 0-3 in {
Requires<[IsPPC6xx]>;
}
-foreach BR = 0-7 in {
- def : InstAlias<"mfbr"#BR#" $Rx",
- (MFDCR gprc:$Rx, !add(BR, 0x80))>,
- Requires<[IsPPC4xx]>;
- def : InstAlias<"mtbr"#BR#" $Rx",
- (MTDCR gprc:$Rx, !add(BR, 0x80))>,
- Requires<[IsPPC4xx]>;
-}
-
-def : InstAlias<"mtdccr $Rx", (MTSPR 1018, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfdccr $Rx", (MFSPR gprc:$Rx, 1018)>, Requires<[IsPPC4xx]>;
-
-def : InstAlias<"mticcr $Rx", (MTSPR 1019, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>;
-
-def : InstAlias<"mtdear $Rx", (MTSPR 981, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfdear $Rx", (MFSPR gprc:$Rx, 981)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mtppr $RT", (MTSPR 896, gprc:$RT)>;
+def : InstAlias<"mfppr $RT", (MFSPR gprc:$RT, 896)>;
def : InstAlias<"mtesr $Rx", (MTSPR 980, gprc:$Rx)>, Requires<[IsPPC4xx]>;
def : InstAlias<"mfesr $Rx", (MFSPR gprc:$Rx, 980)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfspefscr $Rx", (MFSPR gprc:$Rx, 512)>;
-def : InstAlias<"mtspefscr $Rx", (MTSPR 512, gprc:$Rx)>;
+def : InstAlias<"mtdear $Rx", (MTSPR 981, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfdear $Rx", (MFSPR gprc:$Rx, 981)>, Requires<[IsPPC4xx]>;
def : InstAlias<"mttcr $Rx", (MTSPR 986, gprc:$Rx)>, Requires<[IsPPC4xx]>;
def : InstAlias<"mftcr $Rx", (MFSPR gprc:$Rx, 986)>, Requires<[IsPPC4xx]>;
-def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>;
-
-def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm",
- (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
-def SUBIS : PPCAsmPseudo<"subis $rA, $rB, $imm",
- (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
-def SUBIC : PPCAsmPseudo<"subic $rA, $rB, $imm",
- (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
-def SUBIC_rec : PPCAsmPseudo<"subic. $rA, $rB, $imm",
- (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
-
-def : InstAlias<"sub $rA, $rB, $rC", (SUBF8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
-def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8_rec g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
-def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
-def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8_rec g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
-
-def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
-def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
-
-def : InstAlias<"mfasr $RT", (MFSPR gprc:$RT, 280)>;
-def : InstAlias<"mtasr $RT", (MTSPR 280, gprc:$RT)>;
+def : InstAlias<"mftbhi $Rx", (MFSPR gprc:$Rx, 988)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-foreach SPRG = 0-3 in {
- def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 272))>;
- def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 272))>;
- def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
- def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
-}
-foreach SPRG = 4-7 in {
- def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>,
- Requires<[IsBookE]>;
- def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 256))>,
- Requires<[IsBookE]>;
- def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
- Requires<[IsBookE]>;
- def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
- Requires<[IsBookE]>;
-}
+def : InstAlias<"mftblo $Rx", (MFSPR gprc:$Rx, 989)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mttblo $Rx", (MTSPR 989, gprc:$Rx)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>;
+def : InstAlias<"mtsrr2 $Rx", (MTSPR 990, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfsrr2 $Rx", (MFSPR gprc:$Rx, 990)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfdec $RT", (MFSPR gprc:$RT, 22)>;
-def : InstAlias<"mtdec $RT", (MTSPR 22, gprc:$RT)>;
+def : InstAlias<"mtsrr3 $Rx", (MTSPR 991, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfsrr3 $Rx", (MFSPR gprc:$Rx, 991)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfpvr $RT", (MFSPR gprc:$RT, 287)>;
+def : InstAlias<"mtdccr $Rx", (MTSPR 1018, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfdccr $Rx", (MFSPR gprc:$Rx, 1018)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfsdr1 $RT", (MFSPR gprc:$RT, 25)>;
-def : InstAlias<"mtsdr1 $RT", (MTSPR 25, gprc:$RT)>;
+def : InstAlias<"mticcr $Rx", (MTSPR 1019, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>;
-def : InstAlias<"mfsrr0 $RT", (MFSPR gprc:$RT, 26)>;
-def : InstAlias<"mfsrr1 $RT", (MFSPR gprc:$RT, 27)>;
-def : InstAlias<"mtsrr0 $RT", (MTSPR 26, gprc:$RT)>;
-def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>;
def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>;
@@ -4609,6 +4725,17 @@ def : InstAlias<"tlbwehi $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 0)>,
def : InstAlias<"tlbwelo $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 1)>,
Requires<[IsPPC4xx]>;
+def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>;
+
+def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBIS : PPCAsmPseudo<"subis $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBIC : PPCAsmPseudo<"subic $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBIC_rec : PPCAsmPseudo<"subic. $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+
def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def EXTLWI_rec : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
@@ -4646,6 +4773,13 @@ def CLRLSLWI : PPCAsmPseudo<"clrlslwi $rA, $rS, $b, $n",
def CLRLSLWI_rec : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
+def : InstAlias<"isellt $rT, $rA, $rB",
+ (ISEL gprc:$rT, gprc_nor0:$rA, gprc:$rB, CR0LT)>;
+def : InstAlias<"iselgt $rT, $rA, $rB",
+ (ISEL gprc:$rT, gprc_nor0:$rA, gprc:$rB, CR0GT)>;
+def : InstAlias<"iseleq $rT, $rA, $rB",
+ (ISEL gprc:$rT, gprc_nor0:$rA, gprc:$rB, CR0EQ)>;
+
def : InstAlias<"rotlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINM_rec gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
def : InstAlias<"rotlw $rA, $rS, $rB", (RLWNM gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
@@ -4694,6 +4828,8 @@ def CLRLSLDI_rec : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n",
def SUBPCIS : PPCAsmPseudo<"subpcis $RT, $D", (ins g8rc:$RT, s16imm:$D)>;
def : InstAlias<"rotldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
+def : InstAlias<"rotldi $rA, $rS, $n",
+ (RLDICL_32_64 g8rc:$rA, gprc:$rS, u6imm:$n, 0)>;
def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICL_rec g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCL_rec g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
@@ -4892,6 +5028,8 @@ def : InstAlias<"cmp $bf, 1, $rA, $rB", (CMPD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm64:$imm)>;
def : InstAlias<"cmpl $bf, 1, $rA, $rB", (CMPLD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
+def : InstAlias<"trap", (TW 31, R0, R0)>;
+
multiclass TrapExtendedMnemonic<string name, int to> {
def : InstAlias<"td"#name#"i $rA, $imm", (TDI to, g8rc:$rA, s16imm:$imm)>;
def : InstAlias<"td"#name#" $rA, $rB", (TD to, g8rc:$rA, g8rc:$rB)>;
@@ -5025,8 +5163,11 @@ def RotateInsertByte1 {
dag Left = (RLWIMI RotateInsertByte3.Left, Swap4.Bits, 8, 24, 31);
}
-def : Pat<(i32 (bitreverse i32:$A)),
- (RLDICL_32 RotateInsertByte1.Left, 0, 32)>;
+// Clear the upper half of the register when in 64-bit mode
+let Predicates = [In64BitMode] in
+def : Pat<(i32 (bitreverse i32:$A)), (RLDICL_32 RotateInsertByte1.Left, 0, 32)>;
+let Predicates = [In32BitMode] in
+def : Pat<(i32 (bitreverse i32:$A)), RotateInsertByte1.Left>;
// Fast 64-bit reverse bits algorithm:
// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
new file mode 100644
index 0000000000000..2bab73418e10d
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -0,0 +1,1035 @@
+//===----------------------------------------------------------------------===//
+// PowerPC ISA 3.1 specific type constraints.
+//
+
+def SDT_PPCSplat32 : SDTypeProfile<1, 3, [ SDTCisVT<0, v2i64>,
+ SDTCisVec<1>, SDTCisInt<2>, SDTCisInt<3>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ISA 3.1 specific PPCISD nodes.
+//
+
+def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>;
+
+//===----------------------------------------------------------------------===//
+
+// PC Relative flag (for instructions that use the address of the prefix for
+// address computations).
+class isPCRel { bit PCRel = 1; }
+
+// Top-level class for prefixed instructions.
+class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : Instruction {
+ field bits<64> Inst;
+ field bits<64> SoftFail = 0;
+ bit PCRel = 0; // Default value, set by isPCRel.
+ let Size = 8;
+
+ let Namespace = "PPC";
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+ let Inst{0-5} = pref;
+ let Inst{32-37} = opcode;
+
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ let TSFlags{0} = PPC970_First;
+ let TSFlags{1} = PPC970_Single;
+ let TSFlags{2} = PPC970_Cracked;
+ let TSFlags{5-3} = PPC970_Unit;
+
+ bits<1> Prefixed = 1; // This is a prefixed instruction.
+ let TSFlags{7} = Prefixed;
+
+ // For cases where multiple instruction definitions really represent the
+ // same underlying instruction but with one definition for 64-bit arguments
+ // and one for 32-bit arguments, this bit breaks the degeneracy between
+ // the two forms and allows TableGen to generate mapping tables.
+ bit Interpretation64Bit = 0;
+
+ // Fields used for relation models.
+ string BaseName = "";
+}
+
+class MLS_DForm_R_SI34_RTA5_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRS;
+ bits<39> D_RA;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 2;
+ let Inst{8-10} = 0;
+ let Inst{11} = PCRel;
+ let Inst{12-13} = 0;
+ let Inst{14-31} = D_RA{33-16}; // d0
+
+ // The instruction.
+ let Inst{38-42} = FRS{4-0};
+ let Inst{43-47} = D_RA{38-34}; // RA
+ let Inst{48-63} = D_RA{15-0}; // d1
+}
+
+class MLS_DForm_R_SI34_RTA5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<34> SI;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 2;
+ let Inst{8-10} = 0;
+ let Inst{11} = PCRel;
+ let Inst{12-13} = 0;
+ let Inst{14-31} = SI{33-16};
+
+ // The instruction.
+ let Inst{38-42} = RT;
+ let Inst{43-47} = RA;
+ let Inst{48-63} = SI{15-0};
+}
+
+class MLS_DForm_SI34_RT5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<34> SI;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 2;
+ let Inst{8-10} = 0;
+ let Inst{11} = 0;
+ let Inst{12-13} = 0;
+ let Inst{14-31} = SI{33-16};
+
+ // The instruction.
+ let Inst{38-42} = RT;
+ let Inst{43-47} = 0;
+ let Inst{48-63} = SI{15-0};
+}
+
+multiclass MLS_DForm_R_SI34_RTA5_p<bits<6> opcode, dag OOL, dag IOL,
+ dag PCRel_IOL, string asmstr,
+ InstrItinClass itin> {
+ def NAME : MLS_DForm_R_SI34_RTA5<opcode, OOL, IOL,
+ !strconcat(asmstr, ", 0"), itin, []>;
+ def pc : MLS_DForm_R_SI34_RTA5<opcode, OOL, PCRel_IOL,
+ !strconcat(asmstr, ", 1"), itin, []>, isPCRel;
+}
+
+class 8LS_DForm_R_SI34_RTA5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<39> D_RA;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-10} = 0;
+ let Inst{11} = PCRel;
+ let Inst{12-13} = 0;
+ let Inst{14-31} = D_RA{33-16}; // d0
+
+ // The instruction.
+ let Inst{38-42} = RT{4-0};
+ let Inst{43-47} = D_RA{38-34}; // RA
+ let Inst{48-63} = D_RA{15-0}; // d1
+}
+
+// 8LS:D-Form: [ 1 0 0 // R // d0
+// PO TX T RA d1 ]
+class 8LS_DForm_R_SI34_XT6_RA5<bits<5> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : PI<1, { opcode, ? }, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<39> D_RA;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 0;
+ let Inst{8} = 0;
+ let Inst{9-10} = 0; // reserved
+ let Inst{11} = PCRel;
+ let Inst{12-13} = 0; // reserved
+ let Inst{14-31} = D_RA{33-16}; // d0
+
+ // The instruction.
+ let Inst{37} = XT{5};
+ let Inst{38-42} = XT{4-0};
+ let Inst{43-47} = D_RA{38-34}; // RA
+ let Inst{48-63} = D_RA{15-0}; // d1
+}
+
+// X-Form: [PO T IMM VRB XO TX]
+class XForm_XT6_IMM5_VB5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<5> VRB;
+ bits<5> IMM;
+
+ let Pattern = pattern;
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = IMM;
+ let Inst{16-20} = VRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = XT{5};
+}
+
+class 8RR_XX4Form_IMM8_XTAB6<bits<6> opcode, bits<2> xo,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<6> XC;
+ bits<8> IMM;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 1;
+ let Inst{8} = 0;
+ let Inst{9-11} = 0;
+ let Inst{12-13} = 0;
+ let Inst{14-23} = 0;
+ let Inst{24-31} = IMM;
+
+ // The instruction.
+ let Inst{38-42} = XT{4-0};
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-57} = XC{4-0};
+ let Inst{58-59} = xo;
+ let Inst{60} = XC{5};
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = XT{5};
+}
+
+class VXForm_RD5_N3_VB5<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> RD;
+ bits<5> VB;
+ bits<3> N;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RD;
+ let Inst{11-12} = 0;
+ let Inst{13-15} = N;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+
+// VX-Form: [PO VRT / UIM RB XO].
+// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent
+// "/ UIM" (unused bit followed by a 4-bit immediate)
+// Destructive (insert) forms are suffixed with _ins.
+class VXForm_VRT5_UIM5_RB5_ins<bits<11> xo, string opc, list<dag> pattern>
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, g8rc:$rB),
+ !strconcat(opc, " $vD, $rB, $UIM"), IIC_VecGeneral, pattern>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+
+// VX-Form: [PO VRT RA VRB XO].
+// Destructive (insert) forms are suffixed with _ins.
+class VXForm_VTB5_RA5_ins<bits<11> xo, string opc, list<dag> pattern>
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, vrrc:$vB),
+ !strconcat(opc, " $vD, $rA, $vB"), IIC_VecGeneral, pattern>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+
+// VX-Form: [PO VRT RA RB XO].
+// Destructive (insert) forms are suffixed with _ins.
+class VXForm_VRT5_RAB5_ins<bits<11> xo, string opc, list<dag> pattern>
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB),
+ !strconcat(opc, " $vD, $rA, $rB"), IIC_VecGeneral, pattern>,
+ RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+
+// VN-Form: [PO VRT VRA VRB PS SD XO]
+// SD is "Shift Direction"
+class VNForm_VTAB5_SD3<bits<6> xo, bits<2> ps, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VRT;
+ bits<5> VRA;
+ bits<5> VRB;
+ bits<3> SD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VRT;
+ let Inst{11-15} = VRA;
+ let Inst{16-20} = VRB;
+ let Inst{21-22} = ps;
+ let Inst{23-25} = SD;
+ let Inst{26-31} = xo;
+}
+
+// 8RR:D-Form: [ 1 1 0 // // imm0
+// PO T XO TX imm1 ].
+class 8RR_DForm_IMM32_XT6<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<32> IMM32;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 1;
+ let Inst{8-11} = 0;
+ let Inst{12-13} = 0; // reserved
+ let Inst{14-15} = 0; // reserved
+ let Inst{16-31} = IMM32{31-16};
+
+ // The instruction.
+ let Inst{38-42} = XT{4-0};
+ let Inst{43-46} = xo;
+ let Inst{47} = XT{5};
+ let Inst{48-63} = IMM32{15-0};
+}
+
+// 8RR:D-Form: [ 1 1 0 // // imm0
+// PO T XO IX TX imm1 ].
+class 8RR_DForm_IMM32_XT6_IX<bits<6> opcode, bits<3> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bit IX;
+ bits<32> IMM32;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 1;
+ let Inst{8-11} = 0;
+ let Inst{12-13} = 0; // reserved
+ let Inst{14-15} = 0; // reserved
+ let Inst{16-31} = IMM32{31-16};
+
+ // The instruction.
+ let Inst{38-42} = XT{4-0};
+ let Inst{43-45} = xo;
+ let Inst{46} = IX;
+ let Inst{47} = XT{5};
+ let Inst{48-63} = IMM32{15-0};
+}
+
+class 8RR_XX4Form_XTABC6<bits<6> opcode, bits<2> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<6> XC;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 1;
+ let Inst{8-11} = 0;
+ let Inst{12-13} = 0;
+ let Inst{14-31} = 0;
+
+ // The instruction.
+ let Inst{38-42} = XT{4-0};
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-57} = XC{4-0};
+ let Inst{58-59} = xo;
+ let Inst{60} = XC{5};
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = XT{5};
+}
+
+class 8RR_XX4Form_IMM3_XTABC6<bits<6> opcode, bits<2> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : PI<1, opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<6> XC;
+ bits<3> IMM;
+
+ let Pattern = pattern;
+
+ // The prefix.
+ let Inst{6-7} = 1;
+ let Inst{8-11} = 0;
+ let Inst{12-13} = 0;
+ let Inst{14-28} = 0;
+ let Inst{29-31} = IMM;
+
+ // The instruction.
+ let Inst{38-42} = XT{4-0};
+ let Inst{43-47} = XA{4-0};
+ let Inst{48-52} = XB{4-0};
+ let Inst{53-57} = XC{4-0};
+ let Inst{58-59} = xo;
+ let Inst{60} = XC{5};
+ let Inst{61} = XA{5};
+ let Inst{62} = XB{5};
+ let Inst{63} = XT{5};
+}
+
+// [PO BF / XO2 B XO BX /]
+class XX2_BF3_XO5_XB6_XO9<bits<6> opcode, bits<5> xo2, bits<9> xo, dag OOL,
+ dag IOL, string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = xo2;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+multiclass MLS_DForm_R_SI34_RTA5_MEM_p<bits<6> opcode, dag OOL, dag IOL,
+ dag PCRel_IOL, string asmstr,
+ InstrItinClass itin> {
+ def NAME : MLS_DForm_R_SI34_RTA5_MEM<opcode, OOL, IOL,
+ !strconcat(asmstr, ", 0"), itin, []>;
+ def pc : MLS_DForm_R_SI34_RTA5_MEM<opcode, OOL, PCRel_IOL,
+ !strconcat(asmstr, ", 1"), itin, []>,
+ isPCRel;
+}
+
+multiclass 8LS_DForm_R_SI34_RTA5_p<bits<6> opcode, dag OOL, dag IOL,
+ dag PCRel_IOL, string asmstr,
+ InstrItinClass itin> {
+ def NAME : 8LS_DForm_R_SI34_RTA5<opcode, OOL, IOL,
+ !strconcat(asmstr, ", 0"), itin, []>;
+ def pc : 8LS_DForm_R_SI34_RTA5<opcode, OOL, PCRel_IOL,
+ !strconcat(asmstr, ", 1"), itin, []>, isPCRel;
+}
+
+multiclass 8LS_DForm_R_SI34_XT6_RA5_p<bits<5> opcode, dag OOL, dag IOL,
+ dag PCRel_IOL, string asmstr,
+ InstrItinClass itin> {
+ def NAME : 8LS_DForm_R_SI34_XT6_RA5<opcode, OOL, IOL,
+ !strconcat(asmstr, ", 0"), itin, []>;
+ def pc : 8LS_DForm_R_SI34_XT6_RA5<opcode, OOL, PCRel_IOL,
+ !strconcat(asmstr, ", 1"), itin, []>,
+ isPCRel;
+}
+
+def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">;
+def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">;
+
+let Predicates = [PrefixInstrs] in {
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ defm PADDI8 :
+ MLS_DForm_R_SI34_RTA5_p<14, (outs g8rc:$RT), (ins g8rc:$RA, s34imm:$SI),
+ (ins immZero:$RA, s34imm:$SI),
+ "paddi $RT, $RA, $SI", IIC_LdStLFD>;
+ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+ def PLI8 : MLS_DForm_SI34_RT5<14, (outs g8rc:$RT),
+ (ins s34imm:$SI),
+ "pli $RT, $SI", IIC_IntSimple, []>;
+ }
+ }
+ defm PADDI :
+ MLS_DForm_R_SI34_RTA5_p<14, (outs gprc:$RT), (ins gprc:$RA, s34imm:$SI),
+ (ins immZero:$RA, s34imm:$SI),
+ "paddi $RT, $RA, $SI", IIC_LdStLFD>;
+ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+ def PLI : MLS_DForm_SI34_RT5<14, (outs gprc:$RT),
+ (ins s34imm:$SI),
+ "pli $RT, $SI", IIC_IntSimple, []>;
+ }
+
+ let mayLoad = 1, mayStore = 0 in {
+ defm PLXV :
+ 8LS_DForm_R_SI34_XT6_RA5_p<25, (outs vsrc:$XT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plxv $XT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLFS :
+ MLS_DForm_R_SI34_RTA5_MEM_p<48, (outs f4rc:$FRT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plfs $FRT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLFD :
+ MLS_DForm_R_SI34_RTA5_MEM_p<50, (outs f8rc:$FRT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plfd $FRT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLXSSP :
+ 8LS_DForm_R_SI34_RTA5_p<43, (outs vfrc:$VRT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plxssp $VRT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLXSD :
+ 8LS_DForm_R_SI34_RTA5_p<42, (outs vfrc:$VRT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plxsd $VRT, $D_RA",
+ IIC_LdStLFD>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ defm PLBZ8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plbz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLHZ8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plhz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLHA8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plha $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLWA8 :
+ 8LS_DForm_R_SI34_RTA5_p<41, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plwa $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLWZ8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plwz $RT, $D_RA",
+ IIC_LdStLFD>;
+ }
+ defm PLBZ :
+ MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plbz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLHZ :
+ MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plhz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLHA :
+ MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plha $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLWZ :
+ MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plwz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLWA :
+ 8LS_DForm_R_SI34_RTA5_p<41, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plwa $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLD :
+ 8LS_DForm_R_SI34_RTA5_p<57, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "pld $RT, $D_RA",
+ IIC_LdStLFD>;
+ }
+
+ let mayStore = 1, mayLoad = 0 in {
+ defm PSTXV :
+ 8LS_DForm_R_SI34_XT6_RA5_p<27, (outs), (ins vsrc:$XS, memri34:$D_RA),
+ (ins vsrc:$XS, memri34_pcrel:$D_RA),
+ "pstxv $XS, $D_RA", IIC_LdStLFD>;
+ defm PSTFS :
+ MLS_DForm_R_SI34_RTA5_MEM_p<52, (outs), (ins f4rc:$FRS, memri34:$D_RA),
+ (ins f4rc:$FRS, memri34_pcrel:$D_RA),
+ "pstfs $FRS, $D_RA", IIC_LdStLFD>;
+ defm PSTFD :
+ MLS_DForm_R_SI34_RTA5_MEM_p<54, (outs), (ins f8rc:$FRS, memri34:$D_RA),
+ (ins f8rc:$FRS, memri34_pcrel:$D_RA),
+ "pstfd $FRS, $D_RA", IIC_LdStLFD>;
+ defm PSTXSSP :
+ 8LS_DForm_R_SI34_RTA5_p<47, (outs), (ins vfrc:$VRS, memri34:$D_RA),
+ (ins vfrc:$VRS, memri34_pcrel:$D_RA),
+ "pstxssp $VRS, $D_RA", IIC_LdStLFD>;
+ defm PSTXSD :
+ 8LS_DForm_R_SI34_RTA5_p<46, (outs), (ins vfrc:$VRS, memri34:$D_RA),
+ (ins vfrc:$VRS, memri34_pcrel:$D_RA),
+ "pstxsd $VRS, $D_RA", IIC_LdStLFD>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ defm PSTB8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins g8rc:$RS, memri34:$D_RA),
+ (ins g8rc:$RS, memri34_pcrel:$D_RA),
+ "pstb $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTH8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins g8rc:$RS, memri34:$D_RA),
+ (ins g8rc:$RS, memri34_pcrel:$D_RA),
+ "psth $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTW8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins g8rc:$RS, memri34:$D_RA),
+ (ins g8rc:$RS, memri34_pcrel:$D_RA),
+ "pstw $RS, $D_RA", IIC_LdStLFD>;
+ }
+ defm PSTB :
+ MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins gprc:$RS, memri34:$D_RA),
+ (ins gprc:$RS, memri34_pcrel:$D_RA),
+ "pstb $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTH :
+ MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins gprc:$RS, memri34:$D_RA),
+ (ins gprc:$RS, memri34_pcrel:$D_RA),
+ "psth $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTW :
+ MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins gprc:$RS, memri34:$D_RA),
+ (ins gprc:$RS, memri34_pcrel:$D_RA),
+ "pstw $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTD :
+ 8LS_DForm_R_SI34_RTA5_p<61, (outs), (ins g8rc:$RS, memri34:$D_RA),
+ (ins g8rc:$RS, memri34_pcrel:$D_RA),
+ "pstd $RS, $D_RA", IIC_LdStLFD>;
+ }
+}
+
+// TODO: We have an added complexity of 500 here. This is only a temporary
+// solution to have tablegen consider these patterns first. The way we do
+// addressing for PowerPC is complex depending on available D form, X form, or
+// aligned D form loads/stores like DS and DQ forms. The prefixed
+// instructions in this file also add additional PC Relative loads/stores
+// and D form loads/stores with 34 bit immediates. It is very difficult to force
+// instruction selection to consistently pick these first without the current
+// added complexity. Once pc-relative implementation is complete, a set of
+// follow-up patches will address this refactoring and the AddedComplexity will
+// be removed.
+let Predicates = [PCRelativeMemops], AddedComplexity = 500 in {
+ // Load i32
+ def : Pat<(i32 (zextloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLBZpc $ga, 0)>;
+ def : Pat<(i32 (extloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLBZpc $ga, 0)>;
+ def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLHApc $ga, 0)>;
+ def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLHZpc $ga, 0)>;
+ def : Pat<(i32 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLHZpc $ga, 0)>;
+ def : Pat<(i32 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLWZpc $ga, 0)>;
+
+ // Store i32
+ def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTBpc $RS, $ga, 0)>;
+ def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTHpc $RS, $ga, 0)>;
+ def : Pat<(store i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTWpc $RS, $ga, 0)>;
+
+ // Load i64
+ def : Pat<(i64 (zextloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLBZ8pc $ga, 0)>;
+ def : Pat<(i64 (extloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLBZ8pc $ga, 0)>;
+ def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLHA8pc $ga, 0)>;
+ def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLHZ8pc $ga, 0)>;
+ def : Pat<(i64 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLHZ8pc $ga, 0)>;
+ def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLWZ8pc $ga, 0)>;
+ def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLWA8pc $ga, 0)>;
+ def : Pat<(i64 (extloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+ (PLWZ8pc $ga, 0)>;
+ def : Pat<(i64 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLDpc $ga, 0)>;
+
+ // Store i64
+ def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTB8pc $RS, $ga, 0)>;
+ def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTH8pc $RS, $ga, 0)>;
+ def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTW8pc $RS, $ga, 0)>;
+ def : Pat<(store i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTDpc $RS, $ga, 0)>;
+
+ // Load f32
+ def : Pat<(f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFSpc $addr, 0)>;
+
+ // Store f32
+ def : Pat<(store f32:$FRS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTFSpc $FRS, $ga, 0)>;
+
+ // Load f64
+ def : Pat<(f64 (extloadf32 (PPCmatpcreladdr pcreladdr:$addr))),
+ (COPY_TO_REGCLASS (PLFSpc $addr, 0), VSFRC)>;
+ def : Pat<(f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFDpc $addr, 0)>;
+
+ // Store f64
+ def : Pat<(store f64:$FRS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTFDpc $FRS, $ga, 0)>;
+
+ // Load f128
+ def : Pat<(f128 (load (PPCmatpcreladdr pcreladdr:$addr))),
+ (COPY_TO_REGCLASS (PLXVpc $addr, 0), VRRC)>;
+
+ // Store f128
+ def : Pat<(store f128:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTXVpc (COPY_TO_REGCLASS $XS, VSRC), $ga, 0)>;
+
+ // Load v4i32
+ def : Pat<(v4i32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+
+ // Store v4i32
+ def : Pat<(store v4i32:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTXVpc $XS, $ga, 0)>;
+
+ // Load v2i64
+ def : Pat<(v2i64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+
+ // Store v2i64
+ def : Pat<(store v2i64:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTXVpc $XS, $ga, 0)>;
+
+ // Load v4f32
+ def : Pat<(v4f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+
+ // Store v4f32
+ def : Pat<(store v4f32:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTXVpc $XS, $ga, 0)>;
+
+ // Load v2f64
+ def : Pat<(v2f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+
+ // Store v2f64
+ def : Pat<(store v2f64:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ (PSTXVpc $XS, $ga, 0)>;
+
+ // Atomic Load
+ def : Pat<(atomic_load_8 (PPCmatpcreladdr pcreladdr:$ga)),
+ (PLBZpc $ga, 0)>;
+ def : Pat<(atomic_load_16 (PPCmatpcreladdr pcreladdr:$ga)),
+ (PLHZpc $ga, 0)>;
+ def : Pat<(atomic_load_32 (PPCmatpcreladdr pcreladdr:$ga)),
+ (PLWZpc $ga, 0)>;
+ def : Pat<(atomic_load_64 (PPCmatpcreladdr pcreladdr:$ga)),
+ (PLDpc $ga, 0)>;
+
+ // Atomic Store
+ def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+ (PSTBpc $RS, $ga, 0)>;
+ def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+ (PSTHpc $RS, $ga, 0)>;
+ def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+ (PSTWpc $RS, $ga, 0)>;
+ def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ (PSTB8pc $RS, $ga, 0)>;
+ def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ (PSTH8pc $RS, $ga, 0)>;
+ def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ (PSTW8pc $RS, $ga, 0)>;
+ def : Pat<(atomic_store_64 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ (PSTDpc $RS, $ga, 0)>;
+
+ // Special Cases For PPCstore_scal_int_from_vsr
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)),
+ (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PSTXSDpc (XSCVDPSXDS f64:$src), $dst, 0)>;
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)),
+ (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), $dst, 0)>;
+
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)),
+ (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PSTXSDpc (XSCVDPUXDS f64:$src), $dst, 0)>;
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)),
+ (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), $dst, 0)>;
+
+ // If the PPCmatpcreladdr node is not caught by any other pattern it should be
+ // caught here and turned into a paddi instruction to materialize the address.
+ def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
+}
+
+let Predicates = [PrefixInstrs] in {
+ def XXSPLTIW : 8RR_DForm_IMM32_XT6<32, 3, (outs vsrc:$XT),
+ (ins i32imm:$IMM32),
+ "xxspltiw $XT, $IMM32", IIC_VecGeneral,
+ []>;
+ def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT),
+ (ins i32imm:$IMM32),
+ "xxspltidp $XT, $IMM32", IIC_VecGeneral,
+ [(set v2f64:$XT,
+ (PPCxxspltidp i32:$IMM32))]>;
+ def XXSPLTI32DX :
+ 8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT),
+ (ins vsrc:$XTi, u1imm:$IX, i32imm:$IMM32),
+ "xxsplti32dx $XT, $IX, $IMM32", IIC_VecGeneral,
+ [(set v2i64:$XT,
+ (PPCxxsplti32dx v2i64:$XTi, i32:$IX,
+ i32:$IMM32))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XXPERMX :
+ 8RR_XX4Form_IMM3_XTABC6<34, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
+ vsrc:$XC, u3imm:$UIM),
+ "xxpermx $XT, $XA, $XB, $XC, $UIM",
+ IIC_VecPerm, []>;
+ def XXBLENDVB :
+ 8RR_XX4Form_XTABC6<33, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
+ vsrc:$XC), "xxblendvb $XT, $XA, $XB, $XC",
+ IIC_VecGeneral, []>;
+ def XXBLENDVH :
+ 8RR_XX4Form_XTABC6<33, 1, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
+ vsrc:$XC), "xxblendvh $XT, $XA, $XB, $XC",
+ IIC_VecGeneral, []>;
+ def XXBLENDVW :
+ 8RR_XX4Form_XTABC6<33, 2, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
+ vsrc:$XC), "xxblendvw $XT, $XA, $XB, $XC",
+ IIC_VecGeneral, []>;
+ def XXBLENDVD :
+ 8RR_XX4Form_XTABC6<33, 3, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
+ vsrc:$XC), "xxblendvd $XT, $XA, $XB, $XC",
+ IIC_VecGeneral, []>;
+}
+
+let Predicates = [IsISA3_1] in {
+ def VSLDBI : VNForm_VTAB5_SD3<22, 0, (outs vrrc:$VRT),
+ (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH),
+ "vsldbi $VRT, $VRA, $VRB, $SH",
+ IIC_VecGeneral,
+ [(set v16i8:$VRT,
+ (int_ppc_altivec_vsldbi v16i8:$VRA,
+ v16i8:$VRB,
+ i32:$SH))]>;
+ def VSRDBI : VNForm_VTAB5_SD3<22, 1, (outs vrrc:$VRT),
+ (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH),
+ "vsrdbi $VRT, $VRA, $VRB, $SH",
+ IIC_VecGeneral,
+ [(set v16i8:$VRT,
+ (int_ppc_altivec_vsrdbi v16i8:$VRA,
+ v16i8:$VRB,
+ i32:$SH))]>;
+ def VINSW :
+ VXForm_VRT5_UIM5_RB5_ins<207, "vinsw",
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vinsw v4i32:$vDi, i64:$rB,
+ timm:$UIM))]>;
+ def VINSD :
+ VXForm_VRT5_UIM5_RB5_ins<463, "vinsd",
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB,
+ timm:$UIM))]>;
+ def VINSBVLX :
+ VXForm_VTB5_RA5_ins<15, "vinsbvlx",
+ [(set v16i8:$vD,
+ (int_ppc_altivec_vinsbvlx v16i8:$vDi, i64:$rA,
+ v16i8:$vB))]>;
+ def VINSBVRX :
+ VXForm_VTB5_RA5_ins<271, "vinsbvrx",
+ [(set v16i8:$vD,
+ (int_ppc_altivec_vinsbvrx v16i8:$vDi, i64:$rA,
+ v16i8:$vB))]>;
+ def VINSHVLX :
+ VXForm_VTB5_RA5_ins<79, "vinshvlx",
+ [(set v8i16:$vD,
+ (int_ppc_altivec_vinshvlx v8i16:$vDi, i64:$rA,
+ v8i16:$vB))]>;
+ def VINSHVRX :
+ VXForm_VTB5_RA5_ins<335, "vinshvrx",
+ [(set v8i16:$vD,
+ (int_ppc_altivec_vinshvrx v8i16:$vDi, i64:$rA,
+ v8i16:$vB))]>;
+ def VINSWVLX :
+ VXForm_VTB5_RA5_ins<143, "vinswvlx",
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vinswvlx v4i32:$vDi, i64:$rA,
+ v4i32:$vB))]>;
+ def VINSWVRX :
+ VXForm_VTB5_RA5_ins<399, "vinswvrx",
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vinswvrx v4i32:$vDi, i64:$rA,
+ v4i32:$vB))]>;
+ def VINSBLX :
+ VXForm_VRT5_RAB5_ins<527, "vinsblx",
+ [(set v16i8:$vD,
+ (int_ppc_altivec_vinsblx v16i8:$vDi, i64:$rA,
+ i64:$rB))]>;
+ def VINSBRX :
+ VXForm_VRT5_RAB5_ins<783, "vinsbrx",
+ [(set v16i8:$vD,
+ (int_ppc_altivec_vinsbrx v16i8:$vDi, i64:$rA,
+ i64:$rB))]>;
+ def VINSHLX :
+ VXForm_VRT5_RAB5_ins<591, "vinshlx",
+ [(set v8i16:$vD,
+ (int_ppc_altivec_vinshlx v8i16:$vDi, i64:$rA,
+ i64:$rB))]>;
+ def VINSHRX :
+ VXForm_VRT5_RAB5_ins<847, "vinshrx",
+ [(set v8i16:$vD,
+ (int_ppc_altivec_vinshrx v8i16:$vDi, i64:$rA,
+ i64:$rB))]>;
+ def VINSWLX :
+ VXForm_VRT5_RAB5_ins<655, "vinswlx",
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vinswlx v4i32:$vDi, i64:$rA,
+ i64:$rB))]>;
+ def VINSWRX :
+ VXForm_VRT5_RAB5_ins<911, "vinswrx",
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vinswrx v4i32:$vDi, i64:$rA,
+ i64:$rB))]>;
+ def VINSDLX :
+ VXForm_VRT5_RAB5_ins<719, "vinsdlx",
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA,
+ i64:$rB))]>;
+ def VINSDRX :
+ VXForm_VRT5_RAB5_ins<975, "vinsdrx",
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA,
+ i64:$rB))]>;
+
+ def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vpdepd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vpdepd v2i64:$vA, v2i64:$vB))]>;
+ def VPEXTD : VXForm_1<1421, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vpextd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vpextd v2i64:$vA, v2i64:$vB))]>;
+ def PDEPD : XForm_6<31, 156, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "pdepd $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA, (int_ppc_pdepd i64:$rS, i64:$rB))]>;
+ def PEXTD : XForm_6<31, 188, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "pextd $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA, (int_ppc_pextd i64:$rS, i64:$rB))]>;
+ def VCFUGED : VXForm_1<1357, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vcfuged $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vcfuged v2i64:$vA, v2i64:$vB))]>;
+ def VGNB : VXForm_RD5_N3_VB5<1228, (outs g8rc:$rD), (ins vrrc:$vB, u3imm:$N),
+ "vgnb $rD, $vB, $N", IIC_VecGeneral,
+ [(set i64:$rD,
+ (int_ppc_altivec_vgnb v1i128:$vB, timm:$N))]>;
+ def CFUGED : XForm_6<31, 220, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "cfuged $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA, (int_ppc_cfuged i64:$rS, i64:$rB))]>;
+ def XXEVAL :
+ 8RR_XX4Form_IMM8_XTAB6<34, 1, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
+ vsrc:$XC, u8imm:$IMM),
+ "xxeval $XT, $XA, $XB, $XC, $IMM", IIC_VecGeneral,
+ [(set v2i64:$XT, (int_ppc_vsx_xxeval v2i64:$XA,
+ v2i64:$XB, v2i64:$XC, timm:$IMM))]>;
+ def VCLZDM : VXForm_1<1924, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vclzdm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vclzdm v2i64:$vA, v2i64:$vB))]>;
+ def VCTZDM : VXForm_1<1988, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vctzdm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD,
+ (int_ppc_altivec_vctzdm v2i64:$vA, v2i64:$vB))]>;
+ def CNTLZDM : XForm_6<31, 59, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "cntlzdm $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA,
+ (int_ppc_cntlzdm i64:$rS, i64:$rB))]>;
+ def CNTTZDM : XForm_6<31, 571, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "cnttzdm $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA,
+ (int_ppc_cnttzdm i64:$rS, i64:$rB))]>;
+ def XXGENPCVBM :
+ XForm_XT6_IMM5_VB5<60, 916, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM),
+ "xxgenpcvbm $XT, $VRB, $IMM", IIC_VecGeneral, []>;
+ def XXGENPCVHM :
+ XForm_XT6_IMM5_VB5<60, 917, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM),
+ "xxgenpcvhm $XT, $VRB, $IMM", IIC_VecGeneral, []>;
+ def XXGENPCVWM :
+ XForm_XT6_IMM5_VB5<60, 948, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM),
+ "xxgenpcvwm $XT, $VRB, $IMM", IIC_VecGeneral, []>;
+ def XXGENPCVDM :
+ XForm_XT6_IMM5_VB5<60, 949, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM),
+ "xxgenpcvdm $XT, $VRB, $IMM", IIC_VecGeneral, []>;
+ def VCLRLB : VXForm_1<397, (outs vrrc:$vD), (ins vrrc:$vA, gprc:$rB),
+ "vclrlb $vD, $vA, $rB", IIC_VecGeneral,
+ [(set v16i8:$vD,
+ (int_ppc_altivec_vclrlb v16i8:$vA, i32:$rB))]>;
+ def VCLRRB : VXForm_1<461, (outs vrrc:$vD), (ins vrrc:$vA, gprc:$rB),
+ "vclrrb $vD, $vA, $rB", IIC_VecGeneral,
+ [(set v16i8:$vD,
+ (int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>;
+
+ def XVTLSBB : XX2_BF3_XO5_XB6_XO9<60, 2, 475, (outs crrc:$BF), (ins vsrc:$XB),
+ "xvtlsbb $BF, $XB", IIC_VecGeneral, []>;
+
+ // The XFormMemOp flag for the following 8 instructions is set on
+ // the instruction format.
+ let mayLoad = 1, mayStore = 0 in {
+ def LXVRBX : X_XT6_RA5_RB5<31, 13, "lxvrbx", vsrc, []>;
+ def LXVRHX : X_XT6_RA5_RB5<31, 45, "lxvrhx", vsrc, []>;
+ def LXVRWX : X_XT6_RA5_RB5<31, 77, "lxvrwx", vsrc, []>;
+ def LXVRDX : X_XT6_RA5_RB5<31, 109, "lxvrdx", vsrc, []>;
+ }
+
+ let mayLoad = 0, mayStore = 1 in {
+ def STXVRBX : X_XS6_RA5_RB5<31, 141, "stxvrbx", vsrc, []>;
+ def STXVRHX : X_XS6_RA5_RB5<31, 173, "stxvrhx", vsrc, []>;
+ def STXVRWX : X_XS6_RA5_RB5<31, 205, "stxvrwx", vsrc, []>;
+ def STXVRDX : X_XS6_RA5_RB5<31, 237, "stxvrdx", vsrc, []>;
+ }
+}
+
+//---------------------------- Anonymous Patterns ----------------------------//
+let Predicates = [IsISA3_1] in {
+ def : Pat<(v16i8 (int_ppc_vsx_xxgenpcvbm v16i8:$VRB, imm:$IMM)),
+ (v16i8 (COPY_TO_REGCLASS (XXGENPCVBM $VRB, imm:$IMM), VRRC))>;
+ def : Pat<(v8i16 (int_ppc_vsx_xxgenpcvhm v8i16:$VRB, imm:$IMM)),
+ (v8i16 (COPY_TO_REGCLASS (XXGENPCVHM $VRB, imm:$IMM), VRRC))>;
+ def : Pat<(v4i32 (int_ppc_vsx_xxgenpcvwm v4i32:$VRB, imm:$IMM)),
+ (v4i32 (COPY_TO_REGCLASS (XXGENPCVWM $VRB, imm:$IMM), VRRC))>;
+ def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)),
+ (v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>;
+ def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, -1)),
+ (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>;
+ def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
+ (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
+}
+
+let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
+ def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A,
+ i32immNonAllOneNonZero:$A,
+ i32immNonAllOneNonZero:$A,
+ i32immNonAllOneNonZero:$A)),
+ (v4i32 (XXSPLTIW imm:$A))>;
+ def : Pat<(f32 nzFPImmAsi32:$A),
+ (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
+ VSFRC)>;
+ def : Pat<(f64 nzFPImmAsi32:$A),
+ (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
+ VSFRC)>;
+}
+
+let Predicates = [PrefixInstrs] in {
+ def : Pat<(v16i8 (int_ppc_vsx_xxpermx v16i8:$A, v16i8:$B, v16i8:$C, timm:$D)),
+ (COPY_TO_REGCLASS (XXPERMX (COPY_TO_REGCLASS $A, VSRC),
+ (COPY_TO_REGCLASS $B, VSRC),
+ (COPY_TO_REGCLASS $C, VSRC), $D), VSRC)>;
+ def : Pat<(v16i8 (int_ppc_vsx_xxblendvb v16i8:$A, v16i8:$B, v16i8:$C)),
+ (COPY_TO_REGCLASS
+ (XXBLENDVB (COPY_TO_REGCLASS $A, VSRC),
+ (COPY_TO_REGCLASS $B, VSRC),
+ (COPY_TO_REGCLASS $C, VSRC)), VSRC)>;
+ def : Pat<(v8i16 (int_ppc_vsx_xxblendvh v8i16:$A, v8i16:$B, v8i16:$C)),
+ (COPY_TO_REGCLASS
+ (XXBLENDVH (COPY_TO_REGCLASS $A, VSRC),
+ (COPY_TO_REGCLASS $B, VSRC),
+ (COPY_TO_REGCLASS $C, VSRC)), VSRC)>;
+ def : Pat<(int_ppc_vsx_xxblendvw v4i32:$A, v4i32:$B, v4i32:$C),
+ (XXBLENDVW $A, $B, $C)>;
+ def : Pat<(int_ppc_vsx_xxblendvd v2i64:$A, v2i64:$B, v2i64:$C),
+ (XXBLENDVD $A, $B, $C)>;
+}
+
diff --git a/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/llvm/lib/Target/PowerPC/PPCInstrQPX.td
index d67041d46d9f0..2265af2815cb5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrQPX.td
@@ -1,9 +1,9 @@
//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===//
-//
+//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the QPX extension to the PowerPC instruction set.
@@ -101,7 +101,7 @@ let FastIselShouldIgnore = 1 in // FastIsel should ignore all u12 instrs.
//===----------------------------------------------------------------------===//
// Instruction Definitions.
-def HasQPX : Predicate<"PPCSubTarget->hasQPX()">;
+def HasQPX : Predicate<"Subtarget->hasQPX()">;
let Predicates = [HasQPX] in {
let DecoderNamespace = "QPX" in {
let hasSideEffects = 0 in { // QPX instructions don't have side effects.
@@ -167,48 +167,48 @@ let Uses = [RM] in {
// Multiply-add instructions
def QVFMADD : AForm_1<4, 29,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
"qvfmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, v4f64:$FRB))]>;
let isCodeGenOnly = 1 in
def QVFMADDS : QPXA1_Int<0, 29, "qvfmadds", int_ppc_qpx_qvfmadds>;
def QVFMADDSs : AForm_1<0, 29,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
"qvfmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, v4f32:$FRB))]>;
def QVFNMADD : AForm_1<4, 31,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
"qvfnmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
v4f64:$FRB)))]>;
let isCodeGenOnly = 1 in
def QVFNMADDS : QPXA1_Int<0, 31, "qvfnmadds", int_ppc_qpx_qvfnmadds>;
def QVFNMADDSs : AForm_1<0, 31,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
"qvfnmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
v4f32:$FRB)))]>;
def QVFMSUB : AForm_1<4, 28,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
"qvfmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC,
(fneg v4f64:$FRB)))]>;
let isCodeGenOnly = 1 in
def QVFMSUBS : QPXA1_Int<0, 28, "qvfmsubs", int_ppc_qpx_qvfmsubs>;
def QVFMSUBSs : AForm_1<0, 28,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
"qvfmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC,
(fneg v4f32:$FRB)))]>;
def QVFNMSUB : AForm_1<4, 30,
- (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC, qfrc:$FRB),
"qvfnmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
(fneg v4f64:$FRB))))]>;
let isCodeGenOnly = 1 in
def QVFNMSUBS : QPXA1_Int<0, 30, "qvfnmsubs", int_ppc_qpx_qvfnmsubs>;
def QVFNMSUBSs : AForm_1<0, 30,
- (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC, qsrc:$FRB),
"qvfnmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
[(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
(fneg v4f32:$FRB))))]>;
@@ -899,13 +899,13 @@ def : Pat<(int_ppc_qpx_qvfmul v4f64:$A, v4f64:$B),
// Additional QVFNMSUB patterns: -a*c + b == -(a*c - b)
def : Pat<(fma (fneg v4f64:$A), v4f64:$C, v4f64:$B),
- (QVFNMSUB $A, $B, $C)>;
+ (QVFNMSUB $A, $C, $B)>;
def : Pat<(fma v4f64:$A, (fneg v4f64:$C), v4f64:$B),
- (QVFNMSUB $A, $B, $C)>;
+ (QVFNMSUB $A, $C, $B)>;
def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
- (QVFNMSUBSs $A, $B, $C)>;
+ (QVFNMSUBSs $A, $C, $B)>;
def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
- (QVFNMSUBSs $A, $B, $C)>;
+ (QVFNMSUBSs $A, $C, $B)>;
def : Pat<(int_ppc_qpx_qvfmadd v4f64:$A, v4f64:$B, v4f64:$C),
(QVFMADD $A, $B, $C)>;
@@ -1210,4 +1210,3 @@ def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
(QVFTSTNANbs $FRB, $FRB), (i32 7)),
$FRB, $FRA)>;
}
-
diff --git a/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/llvm/lib/Target/PowerPC/PPCInstrSPE.td
index 935c3044ae470..858eb0c9fe500 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrSPE.td
@@ -158,7 +158,7 @@ def EFDCFSF : EFXForm_2a<755, (outs sperc:$RT), (ins spe4rc:$RB),
def EFDCFSI : EFXForm_2a<753, (outs sperc:$RT), (ins gprc:$RB),
"efdcfsi $RT, $RB", IIC_FPDGeneral,
- [(set f64:$RT, (sint_to_fp i32:$RB))]>;
+ [(set f64:$RT, (any_sint_to_fp i32:$RB))]>;
def EFDCFSID : EFXForm_2a<739, (outs sperc:$RT), (ins gprc:$RB),
"efdcfsid $RT, $RB", IIC_FPDGeneral,
@@ -169,7 +169,7 @@ def EFDCFUF : EFXForm_2a<754, (outs sperc:$RT), (ins spe4rc:$RB),
def EFDCFUI : EFXForm_2a<752, (outs sperc:$RT), (ins gprc:$RB),
"efdcfui $RT, $RB", IIC_FPDGeneral,
- [(set f64:$RT, (uint_to_fp i32:$RB))]>;
+ [(set f64:$RT, (any_uint_to_fp i32:$RB))]>;
def EFDCFUID : EFXForm_2a<738, (outs sperc:$RT), (ins gprc:$RB),
"efdcfuid $RT, $RB", IIC_FPDGeneral,
@@ -197,7 +197,7 @@ def EFDCTSIDZ : EFXForm_2a<747, (outs gprc:$RT), (ins sperc:$RB),
def EFDCTSIZ : EFXForm_2a<762, (outs gprc:$RT), (ins sperc:$RB),
"efdctsiz $RT, $RB", IIC_FPDGeneral,
- [(set i32:$RT, (fp_to_sint f64:$RB))]>;
+ [(set i32:$RT, (any_fp_to_sint f64:$RB))]>;
def EFDCTUF : EFXForm_2a<758, (outs sperc:$RT), (ins spe4rc:$RB),
"efdctuf $RT, $RB", IIC_FPDGeneral, []>;
@@ -212,7 +212,7 @@ def EFDCTUIDZ : EFXForm_2a<746, (outs gprc:$RT), (ins sperc:$RB),
def EFDCTUIZ : EFXForm_2a<760, (outs gprc:$RT), (ins sperc:$RB),
"efdctuiz $RT, $RB", IIC_FPDGeneral,
- [(set i32:$RT, (fp_to_uint f64:$RB))]>;
+ [(set i32:$RT, (any_fp_to_uint f64:$RB))]>;
def EFDDIV : EFXForm_1<745, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
"efddiv $RT, $RA, $RB", IIC_FPDivD,
@@ -261,14 +261,14 @@ def EFSCFSF : EFXForm_2a<723, (outs spe4rc:$RT), (ins spe4rc:$RB),
def EFSCFSI : EFXForm_2a<721, (outs spe4rc:$RT), (ins gprc:$RB),
"efscfsi $RT, $RB", IIC_FPSGeneral,
- [(set f32:$RT, (sint_to_fp i32:$RB))]>;
+ [(set f32:$RT, (any_sint_to_fp i32:$RB))]>;
def EFSCFUF : EFXForm_2a<722, (outs spe4rc:$RT), (ins spe4rc:$RB),
"efscfuf $RT, $RB", IIC_FPSGeneral, []>;
def EFSCFUI : EFXForm_2a<720, (outs spe4rc:$RT), (ins gprc:$RB),
"efscfui $RT, $RB", IIC_FPSGeneral,
- [(set f32:$RT, (uint_to_fp i32:$RB))]>;
+ [(set f32:$RT, (any_uint_to_fp i32:$RB))]>;
let isCompare = 1 in {
def EFSCMPEQ : EFXForm_3<718, (outs crrc:$crD), (ins spe4rc:$RA, spe4rc:$RB),
@@ -288,7 +288,7 @@ def EFSCTSI : EFXForm_2a<725, (outs gprc:$RT), (ins spe4rc:$RB),
def EFSCTSIZ : EFXForm_2a<730, (outs gprc:$RT), (ins spe4rc:$RB),
"efsctsiz $RT, $RB", IIC_FPSGeneral,
- [(set i32:$RT, (fp_to_sint f32:$RB))]>;
+ [(set i32:$RT, (any_fp_to_sint f32:$RB))]>;
def EFSCTUF : EFXForm_2a<726, (outs sperc:$RT), (ins spe4rc:$RB),
"efsctuf $RT, $RB", IIC_FPSGeneral, []>;
@@ -299,7 +299,7 @@ def EFSCTUI : EFXForm_2a<724, (outs gprc:$RT), (ins spe4rc:$RB),
def EFSCTUIZ : EFXForm_2a<728, (outs gprc:$RT), (ins spe4rc:$RB),
"efsctuiz $RT, $RB", IIC_FPSGeneral,
- [(set i32:$RT, (fp_to_uint f32:$RB))]>;
+ [(set i32:$RT, (any_fp_to_uint f32:$RB))]>;
def EFSDIV : EFXForm_1<713, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB),
"efsdiv $RT, $RA, $RB", IIC_FPDivD,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index be6b30ffa08b7..9ba5058a6f812 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1,9 +1,9 @@
//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===//
-//
+//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the VSX extension to the PowerPC instruction set.
@@ -25,6 +25,32 @@
// ** in PPCVSXSwapRemoval::gatherVectorInstructions(). **
// ****************************************************************************
+// *********************************** NOTE ***********************************
+// ** When adding new anonymous patterns to this file, please add them to **
+// ** the section titled Anonymous Patterns. Chances are that the existing **
+// ** predicate blocks already contain a combination of features that you **
+// ** are after. There is a list of blocks at the top of the section. If **
+// ** you definitely need a new combination of predicates, please add that **
+// ** combination to the list. **
+// ** File Structure: **
+// ** - Custom PPCISD node definitions **
+// ** - Predicate definitions: predicates to specify the subtargets for **
+// ** which an instruction or pattern can be emitted. **
+// ** - Instruction formats: classes instantiated by the instructions. **
+// ** These generally correspond to instruction formats in section 1.6 of **
+// ** the ISA document. **
+// ** - Instruction definitions: the actual definitions of the instructions **
+// ** often including input patterns that they match. **
+// ** - Helper DAG definitions: We define a number of dag objects to use as **
+// ** input or output patterns for consciseness of the code. **
+// ** - Anonymous patterns: input patterns that an instruction matches can **
+// ** often not be specified as part of the instruction definition, so an **
+// ** anonymous pattern must be specified mapping an input pattern to an **
+// ** output pattern. These are generally guarded by subtarget predicates. **
+// ** - Instruction aliases: used to define extended mnemonics for assembly **
+// ** printing (for example: xxswapd for xxpermdi with 0x2 as the imm). **
+// ****************************************************************************
+
def PPCRegVSRCAsmOperand : AsmOperandClass {
let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber";
}
@@ -89,6 +115,7 @@ def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [
SDTCisVec<0>, SDTCisPtrTy<1>
]>;
+//--------------------------- Custom PPC nodes -------------------------------//
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
@@ -111,7 +138,24 @@ def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-
+def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED",
+ SDTypeProfile<1, 1, []>, []>;
+
+//-------------------------- Predicate definitions ---------------------------//
+def HasVSX : Predicate<"Subtarget->hasVSX()">;
+def IsLittleEndian : Predicate<"Subtarget->isLittleEndian()">;
+def IsBigEndian : Predicate<"!Subtarget->isLittleEndian()">;
+def HasOnlySwappingMemOps : Predicate<"!Subtarget->hasP9Vector()">;
+def HasP8Vector : Predicate<"Subtarget->hasP8Vector()">;
+def HasDirectMove : Predicate<"Subtarget->hasDirectMove()">;
+def NoP9Vector : Predicate<"!Subtarget->hasP9Vector()">;
+def HasP9Vector : Predicate<"Subtarget->hasP9Vector()">;
+def NoP9Altivec : Predicate<"!Subtarget->hasP9Altivec()">;
+
+//--------------------- VSX-specific instruction formats ---------------------//
+// By default, all VSX instructions are to be selected over their Altivec
+// counter parts and they do not have unmodeled sideeffects.
+let AddedComplexity = 400, hasSideEffects = 0 in {
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
string asmstr, InstrItinClass itin, Intrinsic Int,
ValueType OutTy, ValueType InTy> {
@@ -144,14 +188,119 @@ class XX3Form_2s<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
let XB = XA;
}
-def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
-def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">;
-def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">;
-def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">;
+let Predicates = [HasVSX, HasP9Vector] in {
+class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vrrc:$vB),
+ !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+
+// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
+class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isRecordForm;
+
+// [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less),
+// So we use different operand class for VRB
+class X_VT5_XO5_VB5_TyVB<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ RegisterOperand vbtype, list<dag> pattern>
+ : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB),
+ !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+
+// [PO VRT XO VRB XO /]
+class X_VT5_XO5_VB5_VSFR<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vfrc:$vT), (ins vrrc:$vB),
+ !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+
+// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
+class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isRecordForm;
+
+// [PO T XO B XO BX /]
+class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
+ list<dag> pattern>
+ : XX2_RD5_XO5_RS6<opcode, xo2, xo, (outs g8rc:$rT), (ins vsfrc:$XB),
+ !strconcat(opc, " $rT, $XB"), IIC_VecFP, pattern>;
-let Predicates = [HasVSX] in {
-let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
-let hasSideEffects = 0 in { // VSX instructions don't have side effects.
+// [PO T XO B XO BX TX]
+class XX2_XT6_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
+ RegisterOperand vtype, list<dag> pattern>
+ : XX2_RD6_XO5_RS6<opcode, xo2, xo, (outs vtype:$XT), (ins vtype:$XB),
+ !strconcat(opc, " $XT, $XB"), IIC_VecFP, pattern>;
+
+// [PO T A B XO AX BX TX], src and dest register use different operand class
+class XX3_XT5_XA5_XB5<bits<6> opcode, bits<8> xo, string opc,
+ RegisterOperand xty, RegisterOperand aty, RegisterOperand bty,
+ InstrItinClass itin, list<dag> pattern>
+ : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
+ !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
+
+// [PO VRT VRA VRB XO /]
+class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>;
+
+// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
+class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isRecordForm;
+
+// [PO VRT VRA VRB XO /]
+class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vTi, vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>,
+ RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">;
+
+// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
+class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isRecordForm;
+
+class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc,
+ list<dag> pattern>
+ : Z23Form_8<opcode, xo,
+ (outs vrrc:$vT), (ins u1imm:$r, vrrc:$vB, u2imm:$rmc),
+ !strconcat(opc, " $r, $vT, $vB, $rmc"), IIC_VecFP, pattern> {
+ let RC = ex;
+}
+
+// [PO BF // VRA VRB XO /]
+class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : XForm_17<opcode, xo, (outs crrc:$crD), (ins vrrc:$VA, vrrc:$VB),
+ !strconcat(opc, " $crD, $VA, $VB"), IIC_FPCompare> {
+ let Pattern = pattern;
+}
+
+// [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different
+// "out" and "in" dag
+class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
+ RegisterOperand vtype, list<dag> pattern>
+ : XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins memrr:$src),
+ !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>;
+
+// [PO S RA RB XO SX]
+class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
+ RegisterOperand vtype, list<dag> pattern>
+ : XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst),
+ !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>;
+} // Predicates = HasP9Vector
+} // AddedComplexity = 400, hasSideEffects = 0
+
+multiclass ScalToVecWPermute<ValueType Ty, dag In, dag NonPermOut, dag PermOut> {
+ def : Pat<(Ty (scalar_to_vector In)), (Ty NonPermOut)>;
+ def : Pat<(Ty (PPCSToV In)), (Ty PermOut)>;
+}
+
+//-------------------------- Instruction definitions -------------------------//
+// VSX instructions require the VSX feature, they are to be selected over
+// equivalent Altivec patterns (as they address a larger register set) and
+// they do not have unmodeled side effects.
+let Predicates = [HasVSX], AddedComplexity = 400 in {
+let hasSideEffects = 0 in {
// Load indexed instructions
let mayLoad = 1, mayStore = 0 in {
@@ -213,53 +362,53 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
}
} // mayStore
- let Uses = [RM] in {
+ let Uses = [RM], mayRaiseFPException = 1 in {
// Add/Mul Instructions
let isCommutable = 1 in {
def XSADDDP : XX3Form<60, 32,
(outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xsadddp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>;
+ [(set f64:$XT, (any_fadd f64:$XA, f64:$XB))]>;
def XSMULDP : XX3Form<60, 48,
(outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xsmuldp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>;
+ [(set f64:$XT, (any_fmul f64:$XA, f64:$XB))]>;
def XVADDDP : XX3Form<60, 96,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvadddp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fadd v2f64:$XA, v2f64:$XB))]>;
def XVADDSP : XX3Form<60, 64,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvaddsp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fadd v4f32:$XA, v4f32:$XB))]>;
def XVMULDP : XX3Form<60, 112,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvmuldp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fmul v2f64:$XA, v2f64:$XB))]>;
def XVMULSP : XX3Form<60, 80,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvmulsp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fmul v4f32:$XA, v4f32:$XB))]>;
}
// Subtract Instructions
def XSSUBDP : XX3Form<60, 40,
(outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xssubdp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>;
+ [(set f64:$XT, (any_fsub f64:$XA, f64:$XB))]>;
def XVSUBDP : XX3Form<60, 104,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvsubdp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fsub v2f64:$XA, v2f64:$XB))]>;
def XVSUBSP : XX3Form<60, 72,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvsubsp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fsub v4f32:$XA, v4f32:$XB))]>;
// FMA Instructions
let BaseName = "XSMADDADP" in {
@@ -267,7 +416,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XSMADDADP : XX3Form<60, 33,
(outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsmaddadp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>,
+ [(set f64:$XT, (any_fma f64:$XA, f64:$XB, f64:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -283,7 +432,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XSMSUBADP : XX3Form<60, 49,
(outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsmsubadp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
+ [(set f64:$XT, (any_fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -299,7 +448,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XSNMADDADP : XX3Form<60, 161,
(outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsnmaddadp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>,
+ [(set f64:$XT, (fneg (any_fma f64:$XA, f64:$XB, f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -315,7 +464,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XSNMSUBADP : XX3Form<60, 177,
(outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsnmsubadp $XT, $XA, $XB", IIC_VecFP,
- [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
+ [(set f64:$XT, (fneg (any_fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -331,7 +480,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVMADDADP : XX3Form<60, 97,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmaddadp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>,
+ [(set v2f64:$XT, (any_fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -347,7 +496,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVMADDASP : XX3Form<60, 65,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmaddasp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>,
+ [(set v4f32:$XT, (any_fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -363,7 +512,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVMSUBADP : XX3Form<60, 113,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmsubadp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>,
+ [(set v2f64:$XT, (any_fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -379,7 +528,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVMSUBASP : XX3Form<60, 81,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvmsubasp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>,
+ [(set v4f32:$XT, (any_fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -395,7 +544,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVNMADDADP : XX3Form<60, 225,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmaddadp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>,
+ [(set v2f64:$XT, (fneg (any_fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -427,7 +576,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVNMSUBADP : XX3Form<60, 241,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmsubadp $XT, $XA, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>,
+ [(set v2f64:$XT, (fneg (any_fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -443,7 +592,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVNMSUBASP : XX3Form<60, 209,
(outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
"xvnmsubasp $XT, $XA, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>,
+ [(set v4f32:$XT, (fneg (any_fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
@@ -458,11 +607,11 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XSDIVDP : XX3Form<60, 56,
(outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xsdivdp $XT, $XA, $XB", IIC_FPDivD,
- [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>;
+ [(set f64:$XT, (any_fdiv f64:$XA, f64:$XB))]>;
def XSSQRTDP : XX2Form<60, 75,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xssqrtdp $XT, $XB", IIC_FPSqrtD,
- [(set f64:$XT, (fsqrt f64:$XB))]>;
+ [(set f64:$XT, (any_fsqrt f64:$XB))]>;
def XSREDP : XX2Form<60, 90,
(outs vsfrc:$XT), (ins vsfrc:$XB),
@@ -483,20 +632,20 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XVDIVDP : XX3Form<60, 120,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvdivdp $XT, $XA, $XB", IIC_FPDivD,
- [(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fdiv v2f64:$XA, v2f64:$XB))]>;
def XVDIVSP : XX3Form<60, 88,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvdivsp $XT, $XA, $XB", IIC_FPDivS,
- [(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fdiv v4f32:$XA, v4f32:$XB))]>;
def XVSQRTDP : XX2Form<60, 203,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvsqrtdp $XT, $XB", IIC_FPSqrtD,
- [(set v2f64:$XT, (fsqrt v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fsqrt v2f64:$XB))]>;
def XVSQRTSP : XX2Form<60, 139,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvsqrtsp $XT, $XB", IIC_FPSqrtS,
- [(set v4f32:$XT, (fsqrt v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fsqrt v4f32:$XB))]>;
def XVTDIVDP : XX3Form_1<60, 125,
(outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
@@ -740,65 +889,65 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
def XSRDPI : XX2Form<60, 73,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpi $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (fround f64:$XB))]>;
+ [(set f64:$XT, (any_fround f64:$XB))]>;
def XSRDPIC : XX2Form<60, 107,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpic $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (fnearbyint f64:$XB))]>;
+ [(set f64:$XT, (any_fnearbyint f64:$XB))]>;
def XSRDPIM : XX2Form<60, 121,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpim $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (ffloor f64:$XB))]>;
+ [(set f64:$XT, (any_ffloor f64:$XB))]>;
def XSRDPIP : XX2Form<60, 105,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpip $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (fceil f64:$XB))]>;
+ [(set f64:$XT, (any_fceil f64:$XB))]>;
def XSRDPIZ : XX2Form<60, 89,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpiz $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (ftrunc f64:$XB))]>;
+ [(set f64:$XT, (any_ftrunc f64:$XB))]>;
def XVRDPI : XX2Form<60, 201,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpi $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fround v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fround v2f64:$XB))]>;
def XVRDPIC : XX2Form<60, 235,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpic $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fnearbyint v2f64:$XB))]>;
def XVRDPIM : XX2Form<60, 249,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpim $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (ffloor v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_ffloor v2f64:$XB))]>;
def XVRDPIP : XX2Form<60, 233,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpip $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fceil v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_fceil v2f64:$XB))]>;
def XVRDPIZ : XX2Form<60, 217,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpiz $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (ftrunc v2f64:$XB))]>;
+ [(set v2f64:$XT, (any_ftrunc v2f64:$XB))]>;
def XVRSPI : XX2Form<60, 137,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspi $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fround v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fround v4f32:$XB))]>;
def XVRSPIC : XX2Form<60, 171,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspic $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fnearbyint v4f32:$XB))]>;
def XVRSPIM : XX2Form<60, 185,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspim $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (ffloor v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_ffloor v4f32:$XB))]>;
def XVRSPIP : XX2Form<60, 169,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspip $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fceil v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_fceil v4f32:$XB))]>;
def XVRSPIZ : XX2Form<60, 153,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspiz $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (ftrunc v4f32:$XB))]>;
+ [(set v4f32:$XT, (any_ftrunc v4f32:$XB))]>;
// Max/Min Instructions
let isCommutable = 1 in {
@@ -835,7 +984,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
[(set vsrc:$XT,
(int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
} // isCommutable
-} // Uses = [RM]
+ } // Uses = [RM], mayRaiseFPException
// Logical Instructions
let isCommutable = 1 in
@@ -924,433 +1073,8 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
(outs vsrc:$XT), (ins vsfrc:$XB, u2imm:$UIM),
"xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
-} // hasSideEffects
-
-// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
-// instruction selection into a branch sequence.
-let PPC970_Single = 1 in {
-
- def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
- (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
- "#SELECT_CC_VSRC",
- []>;
- def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
- (ins crbitrc:$cond, vsrc:$T, vsrc:$F),
- "#SELECT_VSRC",
- [(set v2f64:$dst,
- (select i1:$cond, v2f64:$T, v2f64:$F))]>;
- def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
- (ins crrc:$cond, f8rc:$T, f8rc:$F,
- i32imm:$BROPC), "#SELECT_CC_VSFRC",
- []>;
- def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
- (ins crbitrc:$cond, f8rc:$T, f8rc:$F),
- "#SELECT_VSFRC",
- [(set f64:$dst,
- (select i1:$cond, f64:$T, f64:$F))]>;
- def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
- (ins crrc:$cond, f4rc:$T, f4rc:$F,
- i32imm:$BROPC), "#SELECT_CC_VSSRC",
- []>;
- def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
- (ins crbitrc:$cond, f4rc:$T, f4rc:$F),
- "#SELECT_VSSRC",
- [(set f32:$dst,
- (select i1:$cond, f32:$T, f32:$F))]>;
-}
-} // AddedComplexity
-
-def : InstAlias<"xvmovdp $XT, $XB",
- (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
-def : InstAlias<"xvmovsp $XT, $XB",
- (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
-
-def : InstAlias<"xxspltd $XT, $XB, 0",
- (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
-def : InstAlias<"xxspltd $XT, $XB, 1",
- (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
-def : InstAlias<"xxmrghd $XT, $XA, $XB",
- (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
-def : InstAlias<"xxmrgld $XT, $XA, $XB",
- (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
-def : InstAlias<"xxswapd $XT, $XB",
- (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
-def : InstAlias<"xxspltd $XT, $XB, 0",
- (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>;
-def : InstAlias<"xxspltd $XT, $XB, 1",
- (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>;
-def : InstAlias<"xxswapd $XT, $XB",
- (XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>;
-
-let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
-
-def : Pat<(v4i32 (vnot_ppc v4i32:$A)),
- (v4i32 (XXLNOR $A, $A))>;
-def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A),
- (and v4i32:$B, v4i32:$C))),
- (v4i32 (XXSEL $A, $B, $C))>;
-
-let Predicates = [IsBigEndian] in {
-def : Pat<(v2f64 (scalar_to_vector f64:$A)),
- (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
-
-def : Pat<(f64 (extractelt v2f64:$S, 0)),
- (f64 (EXTRACT_SUBREG $S, sub_64))>;
-def : Pat<(f64 (extractelt v2f64:$S, 1)),
- (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
-}
-
-let Predicates = [IsLittleEndian] in {
-def : Pat<(v2f64 (scalar_to_vector f64:$A)),
- (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
- (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>;
-
-def : Pat<(f64 (extractelt v2f64:$S, 0)),
- (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
-def : Pat<(f64 (extractelt v2f64:$S, 1)),
- (f64 (EXTRACT_SUBREG $S, sub_64))>;
-}
-
-// Additional fnmsub patterns: -a*b + c == -(a*b - c)
-def : Pat<(fma (fneg f64:$A), f64:$B, f64:$C),
- (XSNMSUBADP $C, $A, $B)>;
-def : Pat<(fma f64:$A, (fneg f64:$B), f64:$C),
- (XSNMSUBADP $C, $A, $B)>;
-
-def : Pat<(fma (fneg v2f64:$A), v2f64:$B, v2f64:$C),
- (XVNMSUBADP $C, $A, $B)>;
-def : Pat<(fma v2f64:$A, (fneg v2f64:$B), v2f64:$C),
- (XVNMSUBADP $C, $A, $B)>;
-
-def : Pat<(fma (fneg v4f32:$A), v4f32:$B, v4f32:$C),
- (XVNMSUBASP $C, $A, $B)>;
-def : Pat<(fma v4f32:$A, (fneg v4f32:$B), v4f32:$C),
- (XVNMSUBASP $C, $A, $B)>;
-
-def : Pat<(v2f64 (bitconvert v4f32:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2f64 (bitconvert v4i32:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2f64 (bitconvert v8i16:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2f64 (bitconvert v16i8:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-
-def : Pat<(v4f32 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v4i32 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v8i16 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v16i8 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2i64 (bitconvert v4f32:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2i64 (bitconvert v4i32:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2i64 (bitconvert v8i16:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2i64 (bitconvert v16i8:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-
-def : Pat<(v4f32 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v4i32 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v8i16 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v16i8 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2f64 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v2i64 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2f64 (bitconvert v1i128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v1i128 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2i64 (bitconvert f128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v4i32 (bitconvert f128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v8i16 (bitconvert f128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v16i8 (bitconvert f128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)),
- (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>;
-def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)),
- (v2f64 (XVCVSXWDP (v2i64 (XXMRGLW $C, $C))))>;
-
-def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)),
- (v2f64 (XVCVUXWDP (v2i64 (XXMRGHW $C, $C))))>;
-def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
- (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
-
-def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>;
-def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>;
-
-// Loads.
-let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
- def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
-
- // Stores.
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
- (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
-}
-
-// Load vector big endian order
-let Predicates = [IsLittleEndian, HasVSX] in {
- def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
- def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
- def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
- def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
- def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
- def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
-}
-
-let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
- def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
- def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
- def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>;
- def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
- (STXVW4X $rS, xoaddr:$dst)>;
-}
-
-// Permutes.
-def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
-def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>;
-def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
-def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
-def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
-
-// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and
-// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable.
-def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>;
-
-// Selects.
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
- (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)),
- (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)),
- (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)),
- (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),
- (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)),
- (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)),
- (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)),
- (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)),
- (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),
- (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
- (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),
- (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
- (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),
- (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
- (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
- (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),
- (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
- (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
- (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
- (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
-// Divides.
-def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
- (XVDIVSP $A, $B)>;
-def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
- (XVDIVDP $A, $B)>;
-
-// Reciprocal estimate
-def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
- (XVRESP $A)>;
-def : Pat<(int_ppc_vsx_xvredp v2f64:$A),
- (XVREDP $A)>;
-
-// Recip. square root estimate
-def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
- (XVRSQRTESP $A)>;
-def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
- (XVRSQRTEDP $A)>;
-
-// Vector selection
-def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
- (COPY_TO_REGCLASS
- (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
- (COPY_TO_REGCLASS $vB, VSRC),
- (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
-def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
- (COPY_TO_REGCLASS
- (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
- (COPY_TO_REGCLASS $vB, VSRC),
- (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
-def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC),
- (XXSEL $vC, $vB, $vA)>;
-def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC),
- (XXSEL $vC, $vB, $vA)>;
-def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
- (XXSEL $vC, $vB, $vA)>;
-def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
- (XXSEL $vC, $vB, $vA)>;
-
-def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)),
- (v4f32 (XVMAXSP $src1, $src2))>;
-def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)),
- (v4f32 (XVMINSP $src1, $src2))>;
-def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)),
- (v2f64 (XVMAXDP $src1, $src2))>;
-def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)),
- (v2f64 (XVMINDP $src1, $src2))>;
-
-let Predicates = [IsLittleEndian] in {
-def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
- (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
- (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
-def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
- (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
- (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
-} // IsLittleEndian
-
-let Predicates = [IsBigEndian] in {
-def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
- (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
-def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
- (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
- (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
-def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
- (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-} // IsBigEndian
-
-} // AddedComplexity
-} // HasVSX
-
-def FpMinMax {
- dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC),
- (COPY_TO_REGCLASS $B, VSFRC)),
- VSSRC);
- dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC),
- (COPY_TO_REGCLASS $B, VSFRC)),
- VSSRC);
-}
-
-let AddedComplexity = 400, Predicates = [HasVSX] in {
- // f32 Min.
- def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
- (f32 FpMinMax.F32Min)>;
- def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
- (f32 FpMinMax.F32Min)>;
- def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
- (f32 FpMinMax.F32Min)>;
- def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
- (f32 FpMinMax.F32Min)>;
- // F32 Max.
- def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
- (f32 FpMinMax.F32Max)>;
- def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
- (f32 FpMinMax.F32Max)>;
- def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
- (f32 FpMinMax.F32Max)>;
- def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
- (f32 FpMinMax.F32Max)>;
-
- // f64 Min.
- def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
- (f64 (XSMINDP $A, $B))>;
- def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
- (f64 (XSMINDP $A, $B))>;
- def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
- (f64 (XSMINDP $A, $B))>;
- def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
- (f64 (XSMINDP $A, $B))>;
- // f64 Max.
- def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
- (f64 (XSMAXDP $A, $B))>;
- def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
- (f64 (XSMAXDP $A, $B))>;
- def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
- (f64 (XSMAXDP $A, $B))>;
- def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
- (f64 (XSMAXDP $A, $B))>;
-}
-
-def ScalarLoads {
- dag Li8 = (i32 (extloadi8 xoaddr:$src));
- dag ZELi8 = (i32 (zextloadi8 xoaddr:$src));
- dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src));
- dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8));
- dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8));
-
- dag Li16 = (i32 (extloadi16 xoaddr:$src));
- dag ZELi16 = (i32 (zextloadi16 xoaddr:$src));
- dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src));
- dag SELi16 = (i32 (sextloadi16 xoaddr:$src));
- dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src));
-
- dag Li32 = (i32 (load xoaddr:$src));
-}
-
-def DWToSPExtractConv {
- dag El0US1 = (f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
- dag El1US1 = (f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
- dag El0US2 = (f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
- dag El1US2 = (f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
- dag El0SS1 = (f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
- dag El1SS1 = (f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
- dag El0SS2 = (f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
- dag El1SS2 = (f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
- dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2));
- dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
-}
-
// The following VSX instructions were introduced in Power ISA 2.07
-/* FIXME: if the operands are v2i64, these patterns will not match.
- we should define new patterns or otherwise match the same patterns
- when the elements are larger than i32.
-*/
-def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">;
-def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
-def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">;
-let Predicates = [HasP8Vector] in {
-let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+let Predicates = [HasVSX, HasP8Vector] in {
let isCommutable = 1 in {
def XXLEQV : XX3Form<60, 186,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
@@ -1363,9 +1087,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
v4i32:$XB)))]>;
} // isCommutable
- def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
- (XXLEQV $A, $B)>;
-
let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
isReMaterializable = 1 in {
def XXLEQVOnes : XX3Form_SameOp<60, 186, (outs vsrc:$XT), (ins),
@@ -1379,7 +1100,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
[(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
// VSX scalar loads introduced in ISA 2.07
- let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
+ let mayLoad = 1, mayStore = 0 in {
let CodeSize = 3 in
def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src),
"lxsspx $XT, $src", IIC_LdStLFD, []>;
@@ -1404,7 +1125,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
} // mayLoad
// VSX scalar stores introduced in ISA 2.07
- let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
+ let mayStore = 1, mayLoad = 0 in {
let CodeSize = 3 in
def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
"stxsspx $XT, $dst", IIC_LdStSTFD, []>;
@@ -1422,64 +1143,42 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
[(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
} // mayStore
- def : Pat<(f64 (extloadf32 xoaddr:$src)),
- (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>;
- def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))),
- (f32 (XFLOADf32 xoaddr:$src))>;
- def : Pat<(f64 (fpextend f32:$src)),
- (COPY_TO_REGCLASS $src, VSFRC)>;
-
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
- (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),
- (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
- (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),
- (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
- (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
- (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),
- (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
- (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),
- (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
- (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
// VSX Elementary Scalar FP arithmetic (SP)
+ let mayRaiseFPException = 1 in {
let isCommutable = 1 in {
def XSADDSP : XX3Form<60, 0,
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xsaddsp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fadd f32:$XA, f32:$XB))]>;
+ [(set f32:$XT, (any_fadd f32:$XA, f32:$XB))]>;
def XSMULSP : XX3Form<60, 16,
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xsmulsp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>;
+ [(set f32:$XT, (any_fmul f32:$XA, f32:$XB))]>;
} // isCommutable
+
def XSSUBSP : XX3Form<60, 8,
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xssubsp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
+ [(set f32:$XT, (any_fsub f32:$XA, f32:$XB))]>;
def XSDIVSP : XX3Form<60, 24,
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xsdivsp $XT, $XA, $XB", IIC_FPDivS,
- [(set f32:$XT, (fdiv f32:$XA, f32:$XB))]>;
+ [(set f32:$XT, (any_fdiv f32:$XA, f32:$XB))]>;
+
def XSRESP : XX2Form<60, 26,
(outs vssrc:$XT), (ins vssrc:$XB),
"xsresp $XT, $XB", IIC_VecFP,
[(set f32:$XT, (PPCfre f32:$XB))]>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1, mayRaiseFPException = 1 in
def XSRSP : XX2Form<60, 281,
(outs vssrc:$XT), (ins vsfrc:$XB),
- "xsrsp $XT, $XB", IIC_VecFP, []>;
+ "xsrsp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (any_fpround f64:$XB))]>;
def XSSQRTSP : XX2Form<60, 11,
(outs vssrc:$XT), (ins vssrc:$XB),
"xssqrtsp $XT, $XB", IIC_FPSqrtS,
- [(set f32:$XT, (fsqrt f32:$XB))]>;
+ [(set f32:$XT, (any_fsqrt f32:$XB))]>;
def XSRSQRTESP : XX2Form<60, 10,
(outs vssrc:$XT), (ins vssrc:$XB),
"xsrsqrtesp $XT, $XB", IIC_VecFP,
@@ -1492,10 +1191,11 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
"xsmaddasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>,
+ [(set f32:$XT, (any_fma f32:$XA, f32:$XB, f32:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
- let IsVSXFMAAlt = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let IsVSXFMAAlt = 1, hasSideEffects = 1 in
def XSMADDMSP : XX3Form<60, 9,
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
@@ -1510,11 +1210,12 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
"xsmsubasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fma f32:$XA, f32:$XB,
+ [(set f32:$XT, (any_fma f32:$XA, f32:$XB,
(fneg f32:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
- let IsVSXFMAAlt = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let IsVSXFMAAlt = 1, hasSideEffects = 1 in
def XSMSUBMSP : XX3Form<60, 25,
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
@@ -1529,11 +1230,12 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
"xsnmaddasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+ [(set f32:$XT, (fneg (any_fma f32:$XA, f32:$XB,
f32:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
- let IsVSXFMAAlt = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let IsVSXFMAAlt = 1, hasSideEffects = 1 in
def XSNMADDMSP : XX3Form<60, 137,
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
@@ -1548,11 +1250,12 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
"xsnmsubasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+ [(set f32:$XT, (fneg (any_fma f32:$XA, f32:$XB,
(fneg f32:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
- let IsVSXFMAAlt = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let IsVSXFMAAlt = 1, hasSideEffects = 1 in
def XSNMSUBMSP : XX3Form<60, 153,
(outs vssrc:$XT),
(ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
@@ -1561,12 +1264,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
AltVSXFMARel;
}
- // Additional xsnmsubasp patterns: -a*b + c == -(a*b - c)
- def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C),
- (XSNMSUBASP $C, $A, $B)>;
- def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C),
- (XSNMSUBASP $C, $A, $B)>;
-
// Single Precision Conversions (FP <-> INT)
def XSCVSXDSP : XX2Form<60, 312,
(outs vssrc:$XT), (ins vsfrc:$XB),
@@ -1582,72 +1279,16 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
"xscvdpspn $XT, $XB", IIC_VecFP, []>;
def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
"xscvspdpn $XT, $XB", IIC_VecFP, []>;
+ } // mayRaiseFPException
- let Predicates = [IsLittleEndian] in {
- def : Pat<DWToSPExtractConv.El0SS1,
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
- def : Pat<DWToSPExtractConv.El1SS1,
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS
- (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
- def : Pat<DWToSPExtractConv.El0US1,
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
- def : Pat<DWToSPExtractConv.El1US1,
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS
- (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
- }
-
- let Predicates = [IsBigEndian] in {
- def : Pat<DWToSPExtractConv.El0SS1,
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
- def : Pat<DWToSPExtractConv.El1SS1,
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
- def : Pat<DWToSPExtractConv.El0US1,
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
- def : Pat<DWToSPExtractConv.El1US1,
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
- }
-
- // Instructions for converting float to i64 feeding a store.
- let Predicates = [NoP9Vector] in {
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8),
- (STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8),
- (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>;
- }
-
- // Instructions for converting float to i32 feeding a store.
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4),
- (STIWX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4),
- (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
-
- def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)),
- (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC),
- (COPY_TO_REGCLASS $src2, VRRC)))>;
- def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)),
- (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC),
- (COPY_TO_REGCLASS $src2, VRRC)))>;
- def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)),
- (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC),
- (COPY_TO_REGCLASS $src2, VRRC)))>;
- def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)),
- (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC),
- (COPY_TO_REGCLASS $src2, VRRC)))>;
-} // AddedComplexity = 400
-} // HasP8Vector
-
-let AddedComplexity = 400 in {
-let Predicates = [HasDirectMove] in {
+ let Predicates = [HasVSX, HasDirectMove] in {
// VSX direct move instructions
def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
"mfvsrd $rA, $XT", IIC_VecGeneral,
[(set i64:$rA, (PPCmfvsr f64:$XT))]>,
Requires<[In64BitMode]>;
- let isCodeGenOnly = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsrc:$XT),
"mfvsrd $rA, $XT", IIC_VecGeneral,
[]>,
@@ -1655,7 +1296,8 @@ let Predicates = [HasDirectMove] in {
def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
"mfvsrwz $rA, $XT", IIC_VecGeneral,
[(set i32:$rA, (PPCmfvsr f64:$XT))]>;
- let isCodeGenOnly = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsrc:$XT),
"mfvsrwz $rA, $XT", IIC_VecGeneral,
[]>;
@@ -1663,7 +1305,8 @@ let Predicates = [HasDirectMove] in {
"mtvsrd $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsra i64:$rA))]>,
Requires<[In64BitMode]>;
- let isCodeGenOnly = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$rA),
"mtvsrd $XT, $rA", IIC_VecGeneral,
[]>,
@@ -1671,56 +1314,547 @@ let Predicates = [HasDirectMove] in {
def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA),
"mtvsrwa $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsra i32:$rA))]>;
- let isCodeGenOnly = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$rA),
"mtvsrwa $XT, $rA", IIC_VecGeneral,
[]>;
def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA),
"mtvsrwz $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
- let isCodeGenOnly = 1 in
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$rA),
"mtvsrwz $XT, $rA", IIC_VecGeneral,
[]>;
-} // HasDirectMove
+ } // HasDirectMove
-let Predicates = [IsISA3_0, HasDirectMove] in {
- def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA),
- "mtvsrws $XT, $rA", IIC_VecGeneral, []>;
+} // HasVSX, HasP8Vector
- def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB),
- "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral,
- []>, Requires<[In64BitMode]>;
+let Predicates = [HasVSX, IsISA3_0, HasDirectMove] in {
+def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA),
+ "mtvsrws $XT, $rA", IIC_VecGeneral, []>;
- def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vsrc:$XT),
- "mfvsrld $rA, $XT", IIC_VecGeneral,
- []>, Requires<[In64BitMode]>;
+def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB),
+ "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral,
+ []>, Requires<[In64BitMode]>;
-} // IsISA3_0, HasDirectMove
-} // AddedComplexity = 400
+def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vsrc:$XT),
+ "mfvsrld $rA, $XT", IIC_VecGeneral,
+ []>, Requires<[In64BitMode]>;
-// We want to parse this from asm, but we don't want to emit this as it would
-// be emitted with a VSX reg. So leave Emit = 0 here.
-def : InstAlias<"mfvrd $rA, $XT",
- (MFVRD g8rc:$rA, vrrc:$XT), 0>;
-def : InstAlias<"mffprd $rA, $src",
- (MFVSRD g8rc:$rA, f8rc:$src)>;
-def : InstAlias<"mtvrd $XT, $rA",
- (MTVRD vrrc:$XT, g8rc:$rA), 0>;
-def : InstAlias<"mtfprd $dst, $rA",
- (MTVSRD f8rc:$dst, g8rc:$rA)>;
-def : InstAlias<"mfvrwz $rA, $XT",
- (MFVRWZ gprc:$rA, vrrc:$XT), 0>;
-def : InstAlias<"mffprwz $rA, $src",
- (MFVSRWZ gprc:$rA, f8rc:$src)>;
-def : InstAlias<"mtvrwa $XT, $rA",
- (MTVRWA vrrc:$XT, gprc:$rA), 0>;
-def : InstAlias<"mtfprwa $dst, $rA",
- (MTVSRWA f8rc:$dst, gprc:$rA)>;
-def : InstAlias<"mtvrwz $XT, $rA",
- (MTVRWZ vrrc:$XT, gprc:$rA), 0>;
-def : InstAlias<"mtfprwz $dst, $rA",
- (MTVSRWZ f8rc:$dst, gprc:$rA)>;
+} // HasVSX, IsISA3_0, HasDirectMove
+
+let Predicates = [HasVSX, HasP9Vector] in {
+ // Quad-Precision Scalar Move Instructions:
+ // Copy Sign
+ def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp",
+ [(set f128:$vT,
+ (fcopysign f128:$vB, f128:$vA))]>;
+
+ // Absolute/Negative-Absolute/Negate
+ def XSABSQP : X_VT5_XO5_VB5<63, 0, 804, "xsabsqp",
+ [(set f128:$vT, (fabs f128:$vB))]>;
+ def XSNABSQP : X_VT5_XO5_VB5<63, 8, 804, "xsnabsqp",
+ [(set f128:$vT, (fneg (fabs f128:$vB)))]>;
+ def XSNEGQP : X_VT5_XO5_VB5<63, 16, 804, "xsnegqp",
+ [(set f128:$vT, (fneg f128:$vB))]>;
+
+ //===--------------------------------------------------------------------===//
+ // Quad-Precision Scalar Floating-Point Arithmetic Instructions:
+
+ // Add/Divide/Multiply/Subtract
+ let mayRaiseFPException = 1 in {
+ let isCommutable = 1 in {
+ def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp",
+ [(set f128:$vT, (any_fadd f128:$vA, f128:$vB))]>;
+ def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp",
+ [(set f128:$vT, (any_fmul f128:$vA, f128:$vB))]>;
+ }
+ def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" ,
+ [(set f128:$vT, (any_fsub f128:$vA, f128:$vB))]>;
+ def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp",
+ [(set f128:$vT, (any_fdiv f128:$vA, f128:$vB))]>;
+ // Square-Root
+ def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp",
+ [(set f128:$vT, (any_fsqrt f128:$vB))]>;
+ // (Negative) Multiply-{Add/Subtract}
+ def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
+ [(set f128:$vT,
+ (any_fma f128:$vA, f128:$vB, f128:$vTi))]>;
+ def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" ,
+ [(set f128:$vT,
+ (any_fma f128:$vA, f128:$vB,
+ (fneg f128:$vTi)))]>;
+ def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
+ [(set f128:$vT,
+ (fneg (any_fma f128:$vA, f128:$vB,
+ f128:$vTi)))]>;
+ def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
+ [(set f128:$vT,
+ (fneg (any_fma f128:$vA, f128:$vB,
+ (fneg f128:$vTi))))]>;
+
+ let isCommutable = 1 in {
+ def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo",
+ [(set f128:$vT,
+ (int_ppc_addf128_round_to_odd
+ f128:$vA, f128:$vB))]>;
+ def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo",
+ [(set f128:$vT,
+ (int_ppc_mulf128_round_to_odd
+ f128:$vA, f128:$vB))]>;
+ }
+ def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo",
+ [(set f128:$vT,
+ (int_ppc_subf128_round_to_odd
+ f128:$vA, f128:$vB))]>;
+ def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo",
+ [(set f128:$vT,
+ (int_ppc_divf128_round_to_odd
+ f128:$vA, f128:$vB))]>;
+ def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo",
+ [(set f128:$vT,
+ (int_ppc_sqrtf128_round_to_odd f128:$vB))]>;
+
+
+ def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo",
+ [(set f128:$vT,
+ (int_ppc_fmaf128_round_to_odd
+ f128:$vA,f128:$vB,f128:$vTi))]>;
+
+ def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" ,
+ [(set f128:$vT,
+ (int_ppc_fmaf128_round_to_odd
+ f128:$vA, f128:$vB, (fneg f128:$vTi)))]>;
+ def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo",
+ [(set f128:$vT,
+ (fneg (int_ppc_fmaf128_round_to_odd
+ f128:$vA, f128:$vB, f128:$vTi)))]>;
+ def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo",
+ [(set f128:$vT,
+ (fneg (int_ppc_fmaf128_round_to_odd
+ f128:$vA, f128:$vB, (fneg f128:$vTi))))]>;
+ } // mayRaiseFPException
+
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ // QP Compare Ordered/Unordered
+ let hasSideEffects = 1 in {
+ def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>;
+ def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>;
+
+ // DP/QP Compare Exponents
+ def XSCMPEXPDP : XX3Form_1<60, 59,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
+
+ // DP Compare ==, >=, >, !=
+ // Use vsrc for XT, because the entire register of XT is set.
+ // XT.dword[1] = 0x0000_0000_0000_0000
+ def XSCMPEQDP : XX3_XT5_XA5_XB5<60, 3, "xscmpeqdp", vsrc, vsfrc, vsfrc,
+ IIC_FPCompare, []>;
+ def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc,
+ IIC_FPCompare, []>;
+ def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc,
+ IIC_FPCompare, []>;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Quad-Precision Floating-Point Conversion Instructions:
+
+ let mayRaiseFPException = 1 in {
+ // Convert DP -> QP
+ def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc,
+ [(set f128:$vT, (any_fpextend f64:$vB))]>;
+
+ // Round & Convert QP -> DP (dword[1] is set to zero)
+ def XSCVQPDP : X_VT5_XO5_VB5_VSFR<63, 20, 836, "xscvqpdp" , []>;
+ def XSCVQPDPO : X_VT5_XO5_VB5_VSFR_Ro<63, 20, 836, "xscvqpdpo",
+ [(set f64:$vT,
+ (int_ppc_truncf128_round_to_odd
+ f128:$vB))]>;
+ }
+
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero)
+ let hasSideEffects = 1 in {
+ def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>;
+ def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>;
+ def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>;
+ def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>;
+ }
+
+ // Convert (Un)Signed DWord -> QP.
+ def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>;
+ def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vfrc, []>;
+
+ // (Round &) Convert DP <-> HP
+ // Note! xscvdphp's src and dest register both use the left 64 bits, so we use
+ // vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits,
+ // but we still use vsfrc for it.
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>;
+ def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>;
+ }
+
+ // Vector HP -> SP
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
+ def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>;
+ def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc,
+ [(set v4f32:$XT,
+ (int_ppc_vsx_xvcvsphp v4f32:$XB))]>;
+
+ let mayRaiseFPException = 1 in {
+ // Round to Quad-Precision Integer [with Inexact]
+ def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>;
+ def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>;
+ }
+
+ // Round Quad-Precision to Double-Extended Precision (fp80)
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
+ def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>;
+
+ //===--------------------------------------------------------------------===//
+ // Insert/Extract Instructions
+
+ // Insert Exponent DP/QP
+ // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
+ "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>;
+ // vB NOTE: only vB.dword[0] is used, that's why we don't use
+ // X_VT5_VA5_VB5 form
+ def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
+ "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>;
+ }
+
+ // Extract Exponent/Significand DP/QP
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>;
+ def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>;
+
+ def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>;
+ def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>;
+ }
+
+ // Vector Insert Word
+ // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
+ def XXINSERTW :
+ XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
+ (ins vsrc:$XTi, vsrc:$XB, u4imm:$UIM),
+ "xxinsertw $XT, $XB, $UIM", IIC_VecFP,
+ [(set v4i32:$XT, (PPCvecinsert v4i32:$XTi, v4i32:$XB,
+ imm32SExt16:$UIM))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ // Vector Extract Unsigned Word
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
+ def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
+ (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
+ "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
+
+ // Vector Insert Exponent DP/SP
+ def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc,
+ IIC_VecFP, [(set v2f64: $XT,(int_ppc_vsx_xviexpdp v2i64:$XA, v2i64:$XB))]>;
+ def XVIEXPSP : XX3_XT5_XA5_XB5<60, 216, "xviexpsp", vsrc, vsrc, vsrc,
+ IIC_VecFP, [(set v4f32: $XT,(int_ppc_vsx_xviexpsp v4i32:$XA, v4i32:$XB))]>;
+
+ // Vector Extract Exponent/Significand DP/SP
+ def XVXEXPDP : XX2_XT6_XO5_XB6<60, 0, 475, "xvxexpdp", vsrc,
+ [(set v2i64: $XT,
+ (int_ppc_vsx_xvxexpdp v2f64:$XB))]>;
+ def XVXEXPSP : XX2_XT6_XO5_XB6<60, 8, 475, "xvxexpsp", vsrc,
+ [(set v4i32: $XT,
+ (int_ppc_vsx_xvxexpsp v4f32:$XB))]>;
+ def XVXSIGDP : XX2_XT6_XO5_XB6<60, 1, 475, "xvxsigdp", vsrc,
+ [(set v2i64: $XT,
+ (int_ppc_vsx_xvxsigdp v2f64:$XB))]>;
+ def XVXSIGSP : XX2_XT6_XO5_XB6<60, 9, 475, "xvxsigsp", vsrc,
+ [(set v4i32: $XT,
+ (int_ppc_vsx_xvxsigsp v4f32:$XB))]>;
+
+ // Test Data Class SP/DP/QP
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298,
+ (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
+ "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>;
+ def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362,
+ (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
+ "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
+ def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708,
+ (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB),
+ "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>;
+ }
+
+ // Vector Test Data Class SP/DP
+ def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5,
+ (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
+ "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
+ [(set v4i32: $XT,
+ (int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>;
+ def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5,
+ (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
+ "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
+ [(set v2i64: $XT,
+ (int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>;
+
+ // Maximum/Minimum Type-C/Type-J DP
+ def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc,
+ IIC_VecFP,
+ [(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>;
+ def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsfrc, vsfrc, vsfrc,
+ IIC_VecFP,
+ [(set f64:$XT, (PPCxsminc f64:$XA, f64:$XB))]>;
+
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc,
+ IIC_VecFP, []>;
+ def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc,
+ IIC_VecFP, []>;
+ }
+
+ // Vector Byte-Reverse H/W/D/Q Word
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
+ def XXBRH : XX2_XT6_XO5_XB6<60, 7, 475, "xxbrh", vsrc, []>;
+ def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc,
+ [(set v4i32:$XT, (bswap v4i32:$XB))]>;
+ def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc,
+ [(set v2i64:$XT, (bswap v2i64:$XB))]>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
+ def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>;
+
+ // Vector Permute
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc,
+ IIC_VecPerm, []>;
+ def XXPERMR : XX3_XT5_XA5_XB5<60, 58, "xxpermr", vsrc, vsrc, vsrc,
+ IIC_VecPerm, []>;
+ }
+
+ // Vector Splat Immediate Byte
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
+ def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
+ "xxspltib $XT, $IMM8", IIC_VecPerm, []>;
+
+ // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
+ // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
+ let mayLoad = 1, mayStore = 0 in {
+ // Load Vector
+ def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
+ "lxv $XT, $src", IIC_LdStLFD, []>;
+ // Load DWord
+ def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src),
+ "lxsd $vD, $src", IIC_LdStLFD, []>;
+ // Load SP from src, convert it to DP, and place in dword[0]
+ def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src),
+ "lxssp $vD, $src", IIC_LdStLFD, []>;
+
+ // Load as Integer Byte/Halfword & Zero Indexed
+ def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc,
+ [(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>;
+ def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc,
+ [(set f64:$XT, (PPClxsizx xoaddr:$src, 2))]>;
+
+ // Load Vector Halfword*8/Byte*16 Indexed
+ def LXVH8X : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>;
+ def LXVB16X : X_XT6_RA5_RB5<31, 876, "lxvb16x", vsrc, []>;
+
+ // Load Vector Indexed
+ def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc,
+ [(set v2f64:$XT, (load xaddrX16:$src))]>;
+ // Load Vector (Left-justified) with Length
+ def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
+ "lxvl $XT, $src, $rB", IIC_LdStLoad,
+ [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>;
+ def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
+ "lxvll $XT, $src, $rB", IIC_LdStLoad,
+ [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>;
+
+ // Load Vector Word & Splat Indexed
+ def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
+ } // mayLoad
+
+ // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
+ // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
+ let mayStore = 1, mayLoad = 0 in {
+ // Store Vector
+ def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
+ "stxv $XT, $dst", IIC_LdStSTFD, []>;
+ // Store DWord
+ def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst),
+ "stxsd $vS, $dst", IIC_LdStSTFD, []>;
+ // Convert DP of dword[0] to SP, and Store to dst
+ def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst),
+ "stxssp $vS, $dst", IIC_LdStSTFD, []>;
+
+ // Store as Integer Byte/Halfword Indexed
+ def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc,
+ [(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>;
+ def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc,
+ [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>;
+ let isCodeGenOnly = 1 in {
+ def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsrc, []>;
+ def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsrc, []>;
+ }
+
+ // Store Vector Halfword*8/Byte*16 Indexed
+ def STXVH8X : X_XS6_RA5_RB5<31, 940, "stxvh8x" , vsrc, []>;
+ def STXVB16X : X_XS6_RA5_RB5<31, 1004, "stxvb16x", vsrc, []>;
+
+ // Store Vector Indexed
+ def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc,
+ [(store v2f64:$XT, xaddrX16:$dst)]>;
+
+ // Store Vector (Left-justified) with Length
+ def STXVL : XX1Form_memOp<31, 397, (outs),
+ (ins vsrc:$XT, memr:$dst, g8rc:$rB),
+ "stxvl $XT, $dst, $rB", IIC_LdStLoad,
+ [(int_ppc_vsx_stxvl v4i32:$XT, addr:$dst,
+ i64:$rB)]>;
+ def STXVLL : XX1Form_memOp<31, 429, (outs),
+ (ins vsrc:$XT, memr:$dst, g8rc:$rB),
+ "stxvll $XT, $dst, $rB", IIC_LdStLoad,
+ [(int_ppc_vsx_stxvll v4i32:$XT, addr:$dst,
+ i64:$rB)]>;
+ } // mayStore
+
+ def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
+ "#DFLOADf32",
+ [(set f32:$XT, (load iaddrX4:$src))]>;
+ def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
+ "#DFLOADf64",
+ [(set f64:$XT, (load iaddrX4:$src))]>;
+ def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
+ "#DFSTOREf32",
+ [(store f32:$XT, iaddrX4:$dst)]>;
+ def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
+ "#DFSTOREf64",
+ [(store f64:$XT, iaddrX4:$dst)]>;
+
+ let mayStore = 1 in {
+ def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
+ (ins spilltovsrrc:$XT, memrr:$dst),
+ "#SPILLTOVSR_STX", []>;
+ def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
+ "#SPILLTOVSR_ST", []>;
+ }
+ let mayLoad = 1 in {
+ def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
+ (ins memrr:$src),
+ "#SPILLTOVSR_LDX", []>;
+ def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
+ "#SPILLTOVSR_LD", []>;
+
+ }
+ } // HasP9Vector
+} // hasSideEffects = 0
+
+let PPC970_Single = 1, AddedComplexity = 400 in {
+
+ def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
+ (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
+ "#SELECT_CC_VSRC",
+ []>;
+ def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
+ (ins crbitrc:$cond, vsrc:$T, vsrc:$F),
+ "#SELECT_VSRC",
+ [(set v2f64:$dst,
+ (select i1:$cond, v2f64:$T, v2f64:$F))]>;
+ def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
+ (ins crrc:$cond, f8rc:$T, f8rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_VSFRC",
+ []>;
+ def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
+ (ins crbitrc:$cond, f8rc:$T, f8rc:$F),
+ "#SELECT_VSFRC",
+ [(set f64:$dst,
+ (select i1:$cond, f64:$T, f64:$F))]>;
+ def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
+ (ins crrc:$cond, f4rc:$T, f4rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_VSSRC",
+ []>;
+ def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
+ (ins crbitrc:$cond, f4rc:$T, f4rc:$F),
+ "#SELECT_VSSRC",
+ [(set f32:$dst,
+ (select i1:$cond, f32:$T, f32:$F))]>;
+}
+}
+
+//----------------------------- DAG Definitions ------------------------------//
+def FpMinMax {
+ dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC),
+ (COPY_TO_REGCLASS $B, VSFRC)),
+ VSSRC);
+ dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC),
+ (COPY_TO_REGCLASS $B, VSFRC)),
+ VSSRC);
+}
+
+def ScalarLoads {
+ dag Li8 = (i32 (extloadi8 xoaddr:$src));
+ dag ZELi8 = (i32 (zextloadi8 xoaddr:$src));
+ dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src));
+ dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8));
+ dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8));
+
+ dag Li16 = (i32 (extloadi16 xoaddr:$src));
+ dag ZELi16 = (i32 (zextloadi16 xoaddr:$src));
+ dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src));
+ dag SELi16 = (i32 (sextloadi16 xoaddr:$src));
+ dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src));
+
+ dag Li32 = (i32 (load xoaddr:$src));
+}
+
+def DWToSPExtractConv {
+ dag El0US1 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+ dag El1US1 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+ dag El0US2 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+ dag El1US2 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+ dag El0SS1 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+ dag El1SS1 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+ dag El0SS2 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+ dag El1SS2 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+ dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2));
+ dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
+}
+
+def WToDPExtractConv {
+ dag El0S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 0))));
+ dag El1S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 1))));
+ dag El2S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 2))));
+ dag El3S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 3))));
+ dag El0U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 0))));
+ dag El1U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 1))));
+ dag El2U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 2))));
+ dag El3U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 3))));
+ dag BV02S = (v2f64 (build_vector El0S, El2S));
+ dag BV13S = (v2f64 (build_vector El1S, El3S));
+ dag BV02U = (v2f64 (build_vector El0U, El2U));
+ dag BV13U = (v2f64 (build_vector El1U, El3U));
+}
/* Direct moves of various widths from GPR's into VSR's. Each move lines
the value up into element 0 (both BE and LE). Namely, entities smaller than
@@ -2038,1789 +2172,11 @@ def VectorExtractions {
dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC);
}
-def NoP9Altivec : Predicate<"!PPCSubTarget->hasP9Altivec()">;
-let AddedComplexity = 400 in {
-// v4f32 scalar <-> vector conversions (BE)
-let Predicates = [IsBigEndian, HasP8Vector] in {
- def : Pat<(v4f32 (scalar_to_vector f32:$A)),
- (v4f32 (XSCVDPSPN $A))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 0)),
- (f32 (XSCVSPDPN $S))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 1)),
- (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 2)),
- (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 3)),
- (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
- (f32 VectorExtractions.BE_VARIABLE_FLOAT)>;
-} // IsBigEndian, HasP8Vector
-
-// Variable index vector_extract for v2f64 does not require P8Vector
-let Predicates = [IsBigEndian, HasVSX] in
- def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
- (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>;
-
-let Predicates = [IsBigEndian, HasDirectMove] in {
- // v16i8 scalar <-> vector conversions (BE)
- def : Pat<(v16i8 (scalar_to_vector i32:$A)),
- (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>;
- def : Pat<(v8i16 (scalar_to_vector i32:$A)),
- (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>;
- def : Pat<(v4i32 (scalar_to_vector i32:$A)),
- (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>;
- def : Pat<(v2i64 (scalar_to_vector i64:$A)),
- (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>;
-
- // v2i64 scalar <-> vector conversions (BE)
- def : Pat<(i64 (vector_extract v2i64:$S, 0)),
- (i64 VectorExtractions.LE_DWORD_1)>;
- def : Pat<(i64 (vector_extract v2i64:$S, 1)),
- (i64 VectorExtractions.LE_DWORD_0)>;
- def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
- (i64 VectorExtractions.BE_VARIABLE_DWORD)>;
-} // IsBigEndian, HasDirectMove
-
-let Predicates = [IsBigEndian, HasDirectMove, NoP9Altivec] in {
- def : Pat<(i32 (vector_extract v16i8:$S, 0)),
- (i32 VectorExtractions.LE_BYTE_15)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 1)),
- (i32 VectorExtractions.LE_BYTE_14)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 2)),
- (i32 VectorExtractions.LE_BYTE_13)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 3)),
- (i32 VectorExtractions.LE_BYTE_12)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 4)),
- (i32 VectorExtractions.LE_BYTE_11)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 5)),
- (i32 VectorExtractions.LE_BYTE_10)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 6)),
- (i32 VectorExtractions.LE_BYTE_9)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 7)),
- (i32 VectorExtractions.LE_BYTE_8)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 8)),
- (i32 VectorExtractions.LE_BYTE_7)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 9)),
- (i32 VectorExtractions.LE_BYTE_6)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 10)),
- (i32 VectorExtractions.LE_BYTE_5)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 11)),
- (i32 VectorExtractions.LE_BYTE_4)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 12)),
- (i32 VectorExtractions.LE_BYTE_3)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 13)),
- (i32 VectorExtractions.LE_BYTE_2)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 14)),
- (i32 VectorExtractions.LE_BYTE_1)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 15)),
- (i32 VectorExtractions.LE_BYTE_0)>;
- def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
- (i32 VectorExtractions.BE_VARIABLE_BYTE)>;
-
- // v8i16 scalar <-> vector conversions (BE)
- def : Pat<(i32 (vector_extract v8i16:$S, 0)),
- (i32 VectorExtractions.LE_HALF_7)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 1)),
- (i32 VectorExtractions.LE_HALF_6)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 2)),
- (i32 VectorExtractions.LE_HALF_5)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 3)),
- (i32 VectorExtractions.LE_HALF_4)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 4)),
- (i32 VectorExtractions.LE_HALF_3)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 5)),
- (i32 VectorExtractions.LE_HALF_2)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 VectorExtractions.LE_HALF_1)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 7)),
- (i32 VectorExtractions.LE_HALF_0)>;
- def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
- (i32 VectorExtractions.BE_VARIABLE_HALF)>;
-
- // v4i32 scalar <-> vector conversions (BE)
- def : Pat<(i32 (vector_extract v4i32:$S, 0)),
- (i32 VectorExtractions.LE_WORD_3)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 1)),
- (i32 VectorExtractions.LE_WORD_2)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 2)),
- (i32 VectorExtractions.LE_WORD_1)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 3)),
- (i32 VectorExtractions.LE_WORD_0)>;
- def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
- (i32 VectorExtractions.BE_VARIABLE_WORD)>;
-} // IsBigEndian, HasDirectMove, NoP9Altivec
-
-// v4f32 scalar <-> vector conversions (LE)
-let Predicates = [IsLittleEndian, HasP8Vector] in {
- def : Pat<(v4f32 (scalar_to_vector f32:$A)),
- (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 0)),
- (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 1)),
- (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 2)),
- (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 3)),
- (f32 (XSCVSPDPN $S))>;
- def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
- (f32 VectorExtractions.LE_VARIABLE_FLOAT)>;
-} // IsLittleEndian, HasP8Vector
-
-// Variable index vector_extract for v2f64 does not require P8Vector
-let Predicates = [IsLittleEndian, HasVSX] in
- def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
- (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
-
-def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
- (STXVD2X $rS, xoaddr:$dst)>;
-def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
- (STXVW4X $rS, xoaddr:$dst)>;
-def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
-def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
-
-// Variable index unsigned vector_extract on Power9
-let Predicates = [HasP9Altivec, IsLittleEndian] in {
- def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
- (VEXTUBRX $Idx, $S)>;
-
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
- (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
- (VEXTUHRX (LI8 0), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
- (VEXTUHRX (LI8 2), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
- (VEXTUHRX (LI8 4), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
- (VEXTUHRX (LI8 6), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
- (VEXTUHRX (LI8 8), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
- (VEXTUHRX (LI8 10), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
- (VEXTUHRX (LI8 12), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
- (VEXTUHRX (LI8 14), $S)>;
-
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
- (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
- (VEXTUWRX (LI8 0), $S)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
- (VEXTUWRX (LI8 4), $S)>;
- // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 VectorExtractions.LE_WORD_2), sub_32)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
- (VEXTUWRX (LI8 12), $S)>;
-
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
- (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
- (EXTSW (VEXTUWRX (LI8 0), $S))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
- (EXTSW (VEXTUWRX (LI8 4), $S))>;
- // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
- (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 VectorExtractions.LE_WORD_2), sub_32))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
- (EXTSW (VEXTUWRX (LI8 12), $S))>;
-
- def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX $Idx, $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 1), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 2), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 3), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 4)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 4), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 5)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 5), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 6), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 7)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 7), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 8)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 9)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 9), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 10)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 10), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 11)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 11), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 12)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 12), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 13)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 13), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 14)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 14), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 15)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 15), $S), sub_32))>;
-
- def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX
- (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 2), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 4), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 6), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 4)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 5)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 10), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 12), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 14), $S), sub_32))>;
-
- def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUWRX
- (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v4i32:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v4i32:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 4), $S), sub_32))>;
- // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
- def : Pat<(i32 (vector_extract v4i32:$S, 2)),
- (i32 VectorExtractions.LE_WORD_2)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 12), $S), sub_32))>;
-}
-
-let Predicates = [HasP9Altivec, IsBigEndian] in {
- def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
- (VEXTUBLX $Idx, $S)>;
-
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
- (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
- (VEXTUHLX (LI8 0), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
- (VEXTUHLX (LI8 2), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
- (VEXTUHLX (LI8 4), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
- (VEXTUHLX (LI8 6), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
- (VEXTUHLX (LI8 8), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
- (VEXTUHLX (LI8 10), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
- (VEXTUHLX (LI8 12), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
- (VEXTUHLX (LI8 14), $S)>;
-
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
- (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
- (VEXTUWLX (LI8 0), $S)>;
-
- // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 VectorExtractions.LE_WORD_2), sub_32)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
- (VEXTUWLX (LI8 8), $S)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
- (VEXTUWLX (LI8 12), $S)>;
-
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
- (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
- (EXTSW (VEXTUWLX (LI8 0), $S))>;
- // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
- (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 VectorExtractions.LE_WORD_2), sub_32))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
- (EXTSW (VEXTUWLX (LI8 8), $S))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
- (EXTSW (VEXTUWLX (LI8 12), $S))>;
-
- def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX $Idx, $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 1), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 2), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 3), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 4)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 4), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 5)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 5), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 6), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 7)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 7), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 8)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 9)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 9), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 10)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 10), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 11)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 11), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 12)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 12), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 13)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 13), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 14)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 14), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 15)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 15), $S), sub_32))>;
-
- def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX
- (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 2), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 4), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 6), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 4)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 5)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 10), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 12), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 14), $S), sub_32))>;
-
- def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUWLX
- (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v4i32:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 0), $S), sub_32))>;
- // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
- def : Pat<(i32 (vector_extract v4i32:$S, 1)),
- (i32 VectorExtractions.LE_WORD_2)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v4i32:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 12), $S), sub_32))>;
-}
-
-let Predicates = [IsLittleEndian, HasDirectMove] in {
- // v16i8 scalar <-> vector conversions (LE)
- def : Pat<(v16i8 (scalar_to_vector i32:$A)),
- (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
- def : Pat<(v8i16 (scalar_to_vector i32:$A)),
- (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
- def : Pat<(v4i32 (scalar_to_vector i32:$A)),
- (v4i32 MovesToVSR.LE_WORD_0)>;
- def : Pat<(v2i64 (scalar_to_vector i64:$A)),
- (v2i64 MovesToVSR.LE_DWORD_0)>;
- // v2i64 scalar <-> vector conversions (LE)
- def : Pat<(i64 (vector_extract v2i64:$S, 0)),
- (i64 VectorExtractions.LE_DWORD_0)>;
- def : Pat<(i64 (vector_extract v2i64:$S, 1)),
- (i64 VectorExtractions.LE_DWORD_1)>;
- def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
- (i64 VectorExtractions.LE_VARIABLE_DWORD)>;
-} // IsLittleEndian, HasDirectMove
-
-let Predicates = [IsLittleEndian, HasDirectMove, NoP9Altivec] in {
- def : Pat<(i32 (vector_extract v16i8:$S, 0)),
- (i32 VectorExtractions.LE_BYTE_0)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 1)),
- (i32 VectorExtractions.LE_BYTE_1)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 2)),
- (i32 VectorExtractions.LE_BYTE_2)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 3)),
- (i32 VectorExtractions.LE_BYTE_3)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 4)),
- (i32 VectorExtractions.LE_BYTE_4)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 5)),
- (i32 VectorExtractions.LE_BYTE_5)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 6)),
- (i32 VectorExtractions.LE_BYTE_6)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 7)),
- (i32 VectorExtractions.LE_BYTE_7)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 8)),
- (i32 VectorExtractions.LE_BYTE_8)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 9)),
- (i32 VectorExtractions.LE_BYTE_9)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 10)),
- (i32 VectorExtractions.LE_BYTE_10)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 11)),
- (i32 VectorExtractions.LE_BYTE_11)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 12)),
- (i32 VectorExtractions.LE_BYTE_12)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 13)),
- (i32 VectorExtractions.LE_BYTE_13)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 14)),
- (i32 VectorExtractions.LE_BYTE_14)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 15)),
- (i32 VectorExtractions.LE_BYTE_15)>;
- def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
- (i32 VectorExtractions.LE_VARIABLE_BYTE)>;
-
- // v8i16 scalar <-> vector conversions (LE)
- def : Pat<(i32 (vector_extract v8i16:$S, 0)),
- (i32 VectorExtractions.LE_HALF_0)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 1)),
- (i32 VectorExtractions.LE_HALF_1)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 2)),
- (i32 VectorExtractions.LE_HALF_2)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 3)),
- (i32 VectorExtractions.LE_HALF_3)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 4)),
- (i32 VectorExtractions.LE_HALF_4)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 5)),
- (i32 VectorExtractions.LE_HALF_5)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 VectorExtractions.LE_HALF_6)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 7)),
- (i32 VectorExtractions.LE_HALF_7)>;
- def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
- (i32 VectorExtractions.LE_VARIABLE_HALF)>;
-
- // v4i32 scalar <-> vector conversions (LE)
- def : Pat<(i32 (vector_extract v4i32:$S, 0)),
- (i32 VectorExtractions.LE_WORD_0)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 1)),
- (i32 VectorExtractions.LE_WORD_1)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 2)),
- (i32 VectorExtractions.LE_WORD_2)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 3)),
- (i32 VectorExtractions.LE_WORD_3)>;
- def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
- (i32 VectorExtractions.LE_VARIABLE_WORD)>;
-} // IsLittleEndian, HasDirectMove, NoP9Altivec
-
-let Predicates = [HasDirectMove, HasVSX] in {
-// bitconvert f32 -> i32
-// (convert to 32-bit fp single, shift right 1 word, move to GPR)
-def : Pat<(i32 (bitconvert f32:$S)),
- (i32 (MFVSRWZ (EXTRACT_SUBREG
- (XXSLDWI (XSCVDPSPN $S), (XSCVDPSPN $S), 3),
- sub_64)))>;
-// bitconvert i32 -> f32
-// (move to FPR, shift left 1 word, convert to 64-bit fp single)
-def : Pat<(f32 (bitconvert i32:$A)),
- (f32 (XSCVSPDPN
- (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>;
-
-// bitconvert f64 -> i64
-// (move to GPR, nothing else needed)
-def : Pat<(i64 (bitconvert f64:$S)),
- (i64 (MFVSRD $S))>;
-
-// bitconvert i64 -> f64
-// (move to FPR, nothing else needed)
-def : Pat<(f64 (bitconvert i64:$S)),
- (f64 (MTVSRD $S))>;
-
-// Rounding to integer.
-def : Pat<(i64 (lrint f64:$S)),
- (i64 (MFVSRD (FCTID $S)))>;
-def : Pat<(i64 (lrint f32:$S)),
- (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
-def : Pat<(i64 (llrint f64:$S)),
- (i64 (MFVSRD (FCTID $S)))>;
-def : Pat<(i64 (llrint f32:$S)),
- (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
-def : Pat<(i64 (lround f64:$S)),
- (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
-def : Pat<(i64 (lround f32:$S)),
- (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
-def : Pat<(i64 (llround f64:$S)),
- (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
-def : Pat<(i64 (llround f32:$S)),
- (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
-}
-
-let Predicates = [HasVSX] in {
-// Rounding for single precision.
-def : Pat<(f32 (fround f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPI
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (fnearbyint f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIC
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (ffloor f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIM
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (fceil f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIP
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (ftrunc f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIZ
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-}
-
-// Materialize a zero-vector of long long
-def : Pat<(v2i64 immAllZerosV),
- (v2i64 (XXLXORz))>;
-}
-
def AlignValues {
dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3));
dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC);
}
-// The following VSX instructions were introduced in Power ISA 3.0
-def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">;
-let AddedComplexity = 400, Predicates = [HasP9Vector] in {
-
- // [PO VRT XO VRB XO /]
- class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- list<dag> pattern>
- : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vrrc:$vB),
- !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
-
- // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
- class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- list<dag> pattern>
- : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isRecordForm;
-
- // [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less),
- // So we use different operand class for VRB
- class X_VT5_XO5_VB5_TyVB<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- RegisterOperand vbtype, list<dag> pattern>
- : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB),
- !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
-
- // [PO VRT XO VRB XO /]
- class X_VT5_XO5_VB5_VSFR<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- list<dag> pattern>
- : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vfrc:$vT), (ins vrrc:$vB),
- !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
-
- // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
- class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- list<dag> pattern>
- : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isRecordForm;
-
- // [PO T XO B XO BX /]
- class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
- list<dag> pattern>
- : XX2_RD5_XO5_RS6<opcode, xo2, xo, (outs g8rc:$rT), (ins vsfrc:$XB),
- !strconcat(opc, " $rT, $XB"), IIC_VecFP, pattern>;
-
- // [PO T XO B XO BX TX]
- class XX2_XT6_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
- RegisterOperand vtype, list<dag> pattern>
- : XX2_RD6_XO5_RS6<opcode, xo2, xo, (outs vtype:$XT), (ins vtype:$XB),
- !strconcat(opc, " $XT, $XB"), IIC_VecFP, pattern>;
-
- // [PO T A B XO AX BX TX], src and dest register use different operand class
- class XX3_XT5_XA5_XB5<bits<6> opcode, bits<8> xo, string opc,
- RegisterOperand xty, RegisterOperand aty, RegisterOperand bty,
- InstrItinClass itin, list<dag> pattern>
- : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
- !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
-
- // [PO VRT VRA VRB XO /]
- class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>;
-
- // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
- class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isRecordForm;
-
- // [PO VRT VRA VRB XO /]
- class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vTi, vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>,
- RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">;
-
- // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
- class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isRecordForm;
-
- //===--------------------------------------------------------------------===//
- // Quad-Precision Scalar Move Instructions:
-
- // Copy Sign
- def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp",
- [(set f128:$vT,
- (fcopysign f128:$vB, f128:$vA))]>;
-
- // Absolute/Negative-Absolute/Negate
- def XSABSQP : X_VT5_XO5_VB5<63, 0, 804, "xsabsqp",
- [(set f128:$vT, (fabs f128:$vB))]>;
- def XSNABSQP : X_VT5_XO5_VB5<63, 8, 804, "xsnabsqp",
- [(set f128:$vT, (fneg (fabs f128:$vB)))]>;
- def XSNEGQP : X_VT5_XO5_VB5<63, 16, 804, "xsnegqp",
- [(set f128:$vT, (fneg f128:$vB))]>;
-
- //===--------------------------------------------------------------------===//
- // Quad-Precision Scalar Floating-Point Arithmetic Instructions:
-
- // Add/Divide/Multiply/Subtract
- let isCommutable = 1 in {
- def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp",
- [(set f128:$vT, (fadd f128:$vA, f128:$vB))]>;
- def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp",
- [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>;
- }
- def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" ,
- [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>;
- def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp",
- [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>;
- // Square-Root
- def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp",
- [(set f128:$vT, (fsqrt f128:$vB))]>;
- // (Negative) Multiply-{Add/Subtract}
- def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
- [(set f128:$vT,
- (fma f128:$vA, f128:$vB,
- f128:$vTi))]>;
- def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" ,
- [(set f128:$vT,
- (fma f128:$vA, f128:$vB,
- (fneg f128:$vTi)))]>;
- def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
- [(set f128:$vT,
- (fneg (fma f128:$vA, f128:$vB,
- f128:$vTi)))]>;
- def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
- [(set f128:$vT,
- (fneg (fma f128:$vA, f128:$vB,
- (fneg f128:$vTi))))]>;
-
- let isCommutable = 1 in {
- def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo",
- [(set f128:$vT,
- (int_ppc_addf128_round_to_odd
- f128:$vA, f128:$vB))]>;
- def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo",
- [(set f128:$vT,
- (int_ppc_mulf128_round_to_odd
- f128:$vA, f128:$vB))]>;
- }
- def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo",
- [(set f128:$vT,
- (int_ppc_subf128_round_to_odd
- f128:$vA, f128:$vB))]>;
- def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo",
- [(set f128:$vT,
- (int_ppc_divf128_round_to_odd
- f128:$vA, f128:$vB))]>;
- def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo",
- [(set f128:$vT,
- (int_ppc_sqrtf128_round_to_odd f128:$vB))]>;
-
-
- def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo",
- [(set f128:$vT,
- (int_ppc_fmaf128_round_to_odd
- f128:$vA,f128:$vB,f128:$vTi))]>;
-
- def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" ,
- [(set f128:$vT,
- (int_ppc_fmaf128_round_to_odd
- f128:$vA, f128:$vB, (fneg f128:$vTi)))]>;
- def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo",
- [(set f128:$vT,
- (fneg (int_ppc_fmaf128_round_to_odd
- f128:$vA, f128:$vB, f128:$vTi)))]>;
- def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo",
- [(set f128:$vT,
- (fneg (int_ppc_fmaf128_round_to_odd
- f128:$vA, f128:$vB, (fneg f128:$vTi))))]>;
-
- // Additional fnmsub patterns: -a*b + c == -(a*b - c)
- def : Pat<(fma (fneg f128:$A), f128:$B, f128:$C), (XSNMSUBQP $C, $A, $B)>;
- def : Pat<(fma f128:$A, (fneg f128:$B), f128:$C), (XSNMSUBQP $C, $A, $B)>;
-
- //===--------------------------------------------------------------------===//
- // Quad/Double-Precision Compare Instructions:
-
- // [PO BF // VRA VRB XO /]
- class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : XForm_17<opcode, xo, (outs crrc:$crD), (ins vrrc:$VA, vrrc:$VB),
- !strconcat(opc, " $crD, $VA, $VB"), IIC_FPCompare> {
- let Pattern = pattern;
- }
-
- // QP Compare Ordered/Unordered
- def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>;
- def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>;
-
- // DP/QP Compare Exponents
- def XSCMPEXPDP : XX3Form_1<60, 59,
- (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
- "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
- def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
-
- // DP Compare ==, >=, >, !=
- // Use vsrc for XT, because the entire register of XT is set.
- // XT.dword[1] = 0x0000_0000_0000_0000
- def XSCMPEQDP : XX3_XT5_XA5_XB5<60, 3, "xscmpeqdp", vsrc, vsfrc, vsfrc,
- IIC_FPCompare, []>;
- def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc,
- IIC_FPCompare, []>;
- def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc,
- IIC_FPCompare, []>;
-
- //===--------------------------------------------------------------------===//
- // Quad-Precision Floating-Point Conversion Instructions:
-
- // Convert DP -> QP
- def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc,
- [(set f128:$vT, (fpextend f64:$vB))]>;
-
- // Round & Convert QP -> DP (dword[1] is set to zero)
- def XSCVQPDP : X_VT5_XO5_VB5_VSFR<63, 20, 836, "xscvqpdp" , []>;
- def XSCVQPDPO : X_VT5_XO5_VB5_VSFR_Ro<63, 20, 836, "xscvqpdpo",
- [(set f64:$vT,
- (int_ppc_truncf128_round_to_odd
- f128:$vB))]>;
-
- // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero)
- def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>;
- def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>;
- def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>;
- def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>;
-
- // Convert (Un)Signed DWord -> QP.
- def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>;
- def : Pat<(f128 (sint_to_fp i64:$src)),
- (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (sint_to_fp (i64 (PPCmfvsr f64:$src)))),
- (f128 (XSCVSDQP $src))>;
- def : Pat<(f128 (sint_to_fp (i32 (PPCmfvsr f64:$src)))),
- (f128 (XSCVSDQP (VEXTSW2Ds $src)))>;
-
- def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vfrc, []>;
- def : Pat<(f128 (uint_to_fp i64:$src)),
- (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (uint_to_fp (i64 (PPCmfvsr f64:$src)))),
- (f128 (XSCVUDQP $src))>;
-
- // Convert (Un)Signed Word -> QP.
- def : Pat<(f128 (sint_to_fp i32:$src)),
- (f128 (XSCVSDQP (MTVSRWA $src)))>;
- def : Pat<(f128 (sint_to_fp (i32 (load xoaddr:$src)))),
- (f128 (XSCVSDQP (LIWAX xoaddr:$src)))>;
- def : Pat<(f128 (uint_to_fp i32:$src)),
- (f128 (XSCVUDQP (MTVSRWZ $src)))>;
- def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))),
- (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>;
-
- //===--------------------------------------------------------------------===//
- // Round to Floating-Point Integer Instructions
-
- // (Round &) Convert DP <-> HP
- // Note! xscvdphp's src and dest register both use the left 64 bits, so we use
- // vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits,
- // but we still use vsfrc for it.
- def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>;
- def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>;
-
- // Vector HP -> SP
- def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>;
- def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc,
- [(set v4f32:$XT,
- (int_ppc_vsx_xvcvsphp v4f32:$XB))]>;
-
- // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
- // separate pattern so that it can convert the input register class from
- // VRRC(v8i16) to VSRC.
- def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)),
- (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>;
-
- class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc,
- list<dag> pattern>
- : Z23Form_8<opcode, xo,
- (outs vrrc:$vT), (ins u1imm:$r, vrrc:$vB, u2imm:$rmc),
- !strconcat(opc, " $r, $vT, $vB, $rmc"), IIC_VecFP, pattern> {
- let RC = ex;
- }
-
- // Round to Quad-Precision Integer [with Inexact]
- def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>;
- def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>;
-
- // Use current rounding mode
- def : Pat<(f128 (fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>;
- // Round to nearest, ties away from zero
- def : Pat<(f128 (fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>;
- // Round towards Zero
- def : Pat<(f128 (ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>;
- // Round towards +Inf
- def : Pat<(f128 (fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>;
- // Round towards -Inf
- def : Pat<(f128 (ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>;
-
- // Use current rounding mode, [with Inexact]
- def : Pat<(f128 (frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>;
-
- // Round Quad-Precision to Double-Extended Precision (fp80)
- def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>;
-
- //===--------------------------------------------------------------------===//
- // Insert/Extract Instructions
-
- // Insert Exponent DP/QP
- // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU
- def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
- "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>;
- // vB NOTE: only vB.dword[0] is used, that's why we don't use
- // X_VT5_VA5_VB5 form
- def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
- "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>;
-
- def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)),
- (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>;
-
- // Extract Exponent/Significand DP/QP
- def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>;
- def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>;
-
- def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>;
- def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>;
-
- def : Pat<(i64 (int_ppc_scalar_extract_expq f128:$vA)),
- (i64 (MFVSRD (EXTRACT_SUBREG
- (v2i64 (XSXEXPQP $vA)), sub_64)))>;
-
- // Vector Insert Word
- // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
- def XXINSERTW :
- XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
- (ins vsrc:$XTi, vsrc:$XB, u4imm:$UIM),
- "xxinsertw $XT, $XB, $UIM", IIC_VecFP,
- [(set v4i32:$XT, (PPCvecinsert v4i32:$XTi, v4i32:$XB,
- imm32SExt16:$UIM))]>,
- RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
-
- // Vector Extract Unsigned Word
- def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
- (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
- "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
-
- // Vector Insert Exponent DP/SP
- def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc,
- IIC_VecFP, [(set v2f64: $XT,(int_ppc_vsx_xviexpdp v2i64:$XA, v2i64:$XB))]>;
- def XVIEXPSP : XX3_XT5_XA5_XB5<60, 216, "xviexpsp", vsrc, vsrc, vsrc,
- IIC_VecFP, [(set v4f32: $XT,(int_ppc_vsx_xviexpsp v4i32:$XA, v4i32:$XB))]>;
-
- // Vector Extract Exponent/Significand DP/SP
- def XVXEXPDP : XX2_XT6_XO5_XB6<60, 0, 475, "xvxexpdp", vsrc,
- [(set v2i64: $XT,
- (int_ppc_vsx_xvxexpdp v2f64:$XB))]>;
- def XVXEXPSP : XX2_XT6_XO5_XB6<60, 8, 475, "xvxexpsp", vsrc,
- [(set v4i32: $XT,
- (int_ppc_vsx_xvxexpsp v4f32:$XB))]>;
- def XVXSIGDP : XX2_XT6_XO5_XB6<60, 1, 475, "xvxsigdp", vsrc,
- [(set v2i64: $XT,
- (int_ppc_vsx_xvxsigdp v2f64:$XB))]>;
- def XVXSIGSP : XX2_XT6_XO5_XB6<60, 9, 475, "xvxsigsp", vsrc,
- [(set v4i32: $XT,
- (int_ppc_vsx_xvxsigsp v4f32:$XB))]>;
-
- let AddedComplexity = 400, Predicates = [HasP9Vector] in {
- // Extra patterns expanding to vector Extract Word/Insert Word
- def : Pat<(v4i32 (int_ppc_vsx_xxinsertw v4i32:$A, v2i64:$B, imm:$IMM)),
- (v4i32 (XXINSERTW $A, $B, imm:$IMM))>;
- def : Pat<(v2i64 (int_ppc_vsx_xxextractuw v2i64:$A, imm:$IMM)),
- (v2i64 (COPY_TO_REGCLASS (XXEXTRACTUW $A, imm:$IMM), VSRC))>;
- } // AddedComplexity = 400, HasP9Vector
-
- //===--------------------------------------------------------------------===//
-
- // Test Data Class SP/DP/QP
- def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298,
- (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
- "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>;
- def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362,
- (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
- "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
- def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708,
- (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB),
- "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>;
-
- // Vector Test Data Class SP/DP
- def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5,
- (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
- "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
- [(set v4i32: $XT,
- (int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>;
- def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5,
- (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
- "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
- [(set v2i64: $XT,
- (int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>;
-
- //===--------------------------------------------------------------------===//
-
- // Maximum/Minimum Type-C/Type-J DP
- def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc,
- IIC_VecFP,
- [(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>;
- def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc,
- IIC_VecFP, []>;
- def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsfrc, vsfrc, vsfrc,
- IIC_VecFP,
- [(set f64:$XT, (PPCxsminc f64:$XA, f64:$XB))]>;
- def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc,
- IIC_VecFP, []>;
-
- //===--------------------------------------------------------------------===//
-
- // Vector Byte-Reverse H/W/D/Q Word
- def XXBRH : XX2_XT6_XO5_XB6<60, 7, 475, "xxbrh", vsrc, []>;
- def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc,
- [(set v4i32:$XT, (bswap v4i32:$XB))]>;
- def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc,
- [(set v2i64:$XT, (bswap v2i64:$XB))]>;
- def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>;
-
- // Vector Reverse
- def : Pat<(v8i16 (bswap v8i16 :$A)),
- (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
- def : Pat<(v1i128 (bswap v1i128 :$A)),
- (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
-
- // Vector Permute
- def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc,
- IIC_VecPerm, []>;
- def XXPERMR : XX3_XT5_XA5_XB5<60, 58, "xxpermr", vsrc, vsrc, vsrc,
- IIC_VecPerm, []>;
-
- // Vector Splat Immediate Byte
- def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
- "xxspltib $XT, $IMM8", IIC_VecPerm, []>;
-
- //===--------------------------------------------------------------------===//
- // Vector/Scalar Load/Store Instructions
-
- // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
- // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
- // Load Vector
- def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
- "lxv $XT, $src", IIC_LdStLFD, []>;
- // Load DWord
- def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src),
- "lxsd $vD, $src", IIC_LdStLFD, []>;
- // Load SP from src, convert it to DP, and place in dword[0]
- def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src),
- "lxssp $vD, $src", IIC_LdStLFD, []>;
-
- // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different
- // "out" and "in" dag
- class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
- RegisterOperand vtype, list<dag> pattern>
- : XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins memrr:$src),
- !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>;
-
- // Load as Integer Byte/Halfword & Zero Indexed
- def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc,
- [(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>;
- def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc,
- [(set f64:$XT, (PPClxsizx xoaddr:$src, 2))]>;
-
- // Load Vector Halfword*8/Byte*16 Indexed
- def LXVH8X : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>;
- def LXVB16X : X_XT6_RA5_RB5<31, 876, "lxvb16x", vsrc, []>;
-
- // Load Vector Indexed
- def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc,
- [(set v2f64:$XT, (load xaddrX16:$src))]>;
- // Load Vector (Left-justified) with Length
- def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
- "lxvl $XT, $src, $rB", IIC_LdStLoad,
- [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>;
- def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
- "lxvll $XT, $src, $rB", IIC_LdStLoad,
- [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>;
-
- // Load Vector Word & Splat Indexed
- def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
- } // mayLoad
-
- // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
- // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
- // Store Vector
- def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
- "stxv $XT, $dst", IIC_LdStSTFD, []>;
- // Store DWord
- def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst),
- "stxsd $vS, $dst", IIC_LdStSTFD, []>;
- // Convert DP of dword[0] to SP, and Store to dst
- def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst),
- "stxssp $vS, $dst", IIC_LdStSTFD, []>;
-
- // [PO S RA RB XO SX]
- class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
- RegisterOperand vtype, list<dag> pattern>
- : XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst),
- !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>;
-
- // Store as Integer Byte/Halfword Indexed
- def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc,
- [(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>;
- def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc,
- [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>;
- let isCodeGenOnly = 1 in {
- def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsrc, []>;
- def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsrc, []>;
- }
-
- // Store Vector Halfword*8/Byte*16 Indexed
- def STXVH8X : X_XS6_RA5_RB5<31, 940, "stxvh8x" , vsrc, []>;
- def STXVB16X : X_XS6_RA5_RB5<31, 1004, "stxvb16x", vsrc, []>;
-
- // Store Vector Indexed
- def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc,
- [(store v2f64:$XT, xaddrX16:$dst)]>;
-
- // Store Vector (Left-justified) with Length
- def STXVL : XX1Form_memOp<31, 397, (outs),
- (ins vsrc:$XT, memr:$dst, g8rc:$rB),
- "stxvl $XT, $dst, $rB", IIC_LdStLoad,
- [(int_ppc_vsx_stxvl v4i32:$XT, addr:$dst,
- i64:$rB)]>;
- def STXVLL : XX1Form_memOp<31, 429, (outs),
- (ins vsrc:$XT, memr:$dst, g8rc:$rB),
- "stxvll $XT, $dst, $rB", IIC_LdStLoad,
- [(int_ppc_vsx_stxvll v4i32:$XT, addr:$dst,
- i64:$rB)]>;
- } // mayStore
-
- let Predicates = [IsLittleEndian] in {
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
- }
-
- let Predicates = [IsBigEndian] in {
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
- }
-
- // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead
- // of f64
- def : Pat<(v8i16 (PPCmtvsrz i32:$A)),
- (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
- def : Pat<(v16i8 (PPCmtvsrz i32:$A)),
- (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
-
- // Patterns for which instructions from ISA 3.0 are a better match
- let Predicates = [IsLittleEndian, HasP9Vector] in {
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
-
- def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)),
- (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>;
- def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst),
- (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
-
- def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)),
- (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>;
- def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst),
- (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
- } // IsLittleEndian, HasP9Vector
-
- let Predicates = [IsBigEndian, HasP9Vector] in {
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
- } // IsBigEndian, HasP9Vector
-
- // D-Form Load/Store
- def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(f128 (quadwOffsetLoad iaddrX16:$src)),
- (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>;
-
- def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore f128:$rS, iaddrX16:$dst),
- (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst),
- (STXV $rS, memrix16:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst),
- (STXV $rS, memrix16:$dst)>;
-
-
- def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(f128 (nonQuadwOffsetLoad xoaddr:$src)),
- (COPY_TO_REGCLASS (LXVX xoaddr:$src), VRRC)>;
- def : Pat<(nonQuadwOffsetStore f128:$rS, xoaddr:$dst),
- (STXVX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
- def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
-
- let AddedComplexity = 400 in {
- // LIWAX - This instruction is used for sign extending i32 -> i64.
- // LIWZX - This instruction will be emitted for i32, f32, and when
- // zero-extending i32 to i64 (zext i32 -> i64).
- let Predicates = [IsLittleEndian] in {
-
- def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC), 2))>;
-
- def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
-
- def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
- (v4i32 (XXPERMDIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
-
- def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
- (v4f32 (XXPERMDIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
- }
-
- let Predicates = [IsBigEndian] in {
- def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
- (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>;
-
- def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
- (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>;
-
- def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
- (v4i32 (XXSLDWIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
-
- def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
- (v4f32 (XXSLDWIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
- }
-
- }
-
- // Build vectors from i8 loads
- def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),
- (v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>;
- def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)),
- (v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>;
- def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)),
- (v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>;
- def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)),
- (v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>;
- def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)),
- (v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>;
- def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)),
- (v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>;
-
- // Build vectors from i16 loads
- def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)),
- (v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>;
- def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)),
- (v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>;
- def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)),
- (v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>;
- def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)),
- (v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>;
- def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
- (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
-
- let Predicates = [IsBigEndian, HasP9Vector] in {
- // Scalar stores of i8
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
-
- // Scalar stores of i16
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
- } // IsBigEndian, HasP9Vector
-
- let Predicates = [IsLittleEndian, HasP9Vector] in {
- // Scalar stores of i8
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
-
- // Scalar stores of i16
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
- } // IsLittleEndian, HasP9Vector
-
-
- // Vector sign extensions
- def : Pat<(f64 (PPCVexts f64:$A, 1)),
- (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>;
- def : Pat<(f64 (PPCVexts f64:$A, 2)),
- (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
-
- def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
- "#DFLOADf32",
- [(set f32:$XT, (load iaddrX4:$src))]>;
- def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
- "#DFLOADf64",
- [(set f64:$XT, (load iaddrX4:$src))]>;
- def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
- "#DFSTOREf32",
- [(store f32:$XT, iaddrX4:$dst)]>;
- def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
- "#DFSTOREf64",
- [(store f64:$XT, iaddrX4:$dst)]>;
-
- def : Pat<(f64 (extloadf32 iaddrX4:$src)),
- (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>;
- def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))),
- (f32 (DFLOADf32 iaddrX4:$src))>;
-
- def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)),
- (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>;
- def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)),
- (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>;
-
- let AddedComplexity = 400 in {
- // The following pseudoinstructions are used to ensure the utilization
- // of all 64 VSX registers.
- let Predicates = [IsLittleEndian, HasP9Vector] in {
- def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
-
- def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
- (v2f64 (XXPERMDIs
- (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>;
- def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
- (v2f64 (XXPERMDIs
- (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), xaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), xaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), iaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- iaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
- } // IsLittleEndian, HasP9Vector
-
- let Predicates = [IsBigEndian, HasP9Vector] in {
- def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
- (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
- (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
-
- def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
- (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
- def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
- (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), xaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), xaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), iaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), iaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
- } // IsBigEndian, HasP9Vector
- }
-
- let Predicates = [IsBigEndian, HasP9Vector] in {
-
- // (Un)Signed DWord vector extract -> QP
- def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
- (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
- (f128 (XSCVSDQP
- (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
- def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
- (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
- (f128 (XSCVUDQP
- (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
-
- // (Un)Signed Word vector extract -> QP
- def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 1)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
- foreach Idx = [0,2,3] in {
- def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG
- (VEXTSW2D (VSPLTW Idx, $src)), sub_64)))>;
- }
- foreach Idx = 0-3 in {
- def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
- (f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>;
- }
-
- // (Un)Signed HWord vector extract -> QP
- foreach Idx = 0-7 in {
- def : Pat<(f128 (sint_to_fp
- (i32 (sext_inreg
- (vector_extract v8i16:$src, Idx), i16)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG
- (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)),
- sub_64)))>;
- // The SDAG adds the `and` since an `i16` is being extracted as an `i32`.
- def : Pat<(f128 (uint_to_fp
- (and (i32 (vector_extract v8i16:$src, Idx)), 65535))),
- (f128 (XSCVUDQP (EXTRACT_SUBREG
- (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
- }
-
- // (Un)Signed Byte vector extract -> QP
- foreach Idx = 0-15 in {
- def : Pat<(f128 (sint_to_fp
- (i32 (sext_inreg (vector_extract v16i8:$src, Idx),
- i8)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG
- (VEXTSB2D (VEXTRACTUB Idx, $src)), sub_64)))>;
- def : Pat<(f128 (uint_to_fp
- (and (i32 (vector_extract v16i8:$src, Idx)), 255))),
- (f128 (XSCVUDQP
- (EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>;
- }
-
- // Unsiged int in vsx register -> QP
- def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
- (f128 (XSCVUDQP
- (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>;
- } // IsBigEndian, HasP9Vector
-
- let Predicates = [IsLittleEndian, HasP9Vector] in {
-
- // (Un)Signed DWord vector extract -> QP
- def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
- (f128 (XSCVSDQP
- (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
- def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
- (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
- (f128 (XSCVUDQP
- (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
- def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
- (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
-
- // (Un)Signed Word vector extract -> QP
- foreach Idx = [[0,3],[1,2],[3,0]] in {
- def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG
- (VEXTSW2D (VSPLTW !head(!tail(Idx)), $src)),
- sub_64)))>;
- }
- def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 2)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
-
- foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in {
- def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
- (f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>;
- }
-
- // (Un)Signed HWord vector extract -> QP
- // The Nested foreach lists identifies the vector element and corresponding
- // register byte location.
- foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in {
- def : Pat<(f128 (sint_to_fp
- (i32 (sext_inreg
- (vector_extract v8i16:$src, !head(Idx)), i16)))),
- (f128 (XSCVSDQP
- (EXTRACT_SUBREG (VEXTSH2D
- (VEXTRACTUH !head(!tail(Idx)), $src)),
- sub_64)))>;
- def : Pat<(f128 (uint_to_fp
- (and (i32 (vector_extract v8i16:$src, !head(Idx))),
- 65535))),
- (f128 (XSCVUDQP (EXTRACT_SUBREG
- (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
- }
-
- // (Un)Signed Byte vector extract -> QP
- foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7],
- [9,6],[10,5],[11,4],[12,3],[13,2],[14,1],[15,0]] in {
- def : Pat<(f128 (sint_to_fp
- (i32 (sext_inreg
- (vector_extract v16i8:$src, !head(Idx)), i8)))),
- (f128 (XSCVSDQP
- (EXTRACT_SUBREG
- (VEXTSB2D (VEXTRACTUB !head(!tail(Idx)), $src)),
- sub_64)))>;
- def : Pat<(f128 (uint_to_fp
- (and (i32 (vector_extract v16i8:$src, !head(Idx))),
- 255))),
- (f128 (XSCVUDQP
- (EXTRACT_SUBREG
- (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
- }
-
- // Unsiged int in vsx register -> QP
- def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
- (f128 (XSCVUDQP
- (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 8)))>;
- } // IsLittleEndian, HasP9Vector
-
- // Convert (Un)Signed DWord in memory -> QP
- def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))),
- (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>;
- def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))),
- (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>;
- def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))),
- (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>;
- def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))),
- (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>;
-
- // Convert Unsigned HWord in memory -> QP
- def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)),
- (f128 (XSCVUDQP (LXSIHZX xaddr:$src)))>;
-
- // Convert Unsigned Byte in memory -> QP
- def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)),
- (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>;
-
- // Truncate & Convert QP -> (Un)Signed (D)Word.
- def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>;
- def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>;
- def : Pat<(i32 (fp_to_sint f128:$src)),
- (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>;
- def : Pat<(i32 (fp_to_uint f128:$src)),
- (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>;
-
- // Instructions for store(fptosi).
- // The 8-byte version is repeated here due to availability of D-Form STXSD.
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8),
- (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
- xaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8),
- (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
- iaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4),
- (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 2),
- (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1),
- (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8),
- (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8),
- (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2),
- (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 1),
- (STXSIBX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
-
- // Instructions for store(fptoui).
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8),
- (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
- xaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8),
- (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
- iaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4),
- (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 2),
- (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1),
- (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8),
- (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8),
- (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2),
- (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1),
- (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
-
- // Round & Convert QP -> DP/SP
- def : Pat<(f64 (fpround f128:$src)), (f64 (XSCVQPDP $src))>;
- def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>;
-
- // Convert SP -> QP
- def : Pat<(f128 (fpextend f32:$src)),
- (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>;
-
- def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)),
- (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC),
- (COPY_TO_REGCLASS $XB, VSSRC)),
- VSSRC))>;
- def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)),
- (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC),
- (COPY_TO_REGCLASS $XB, VSSRC)),
- VSSRC))>;
-
-} // end HasP9Vector, AddedComplexity
-
-let AddedComplexity = 400 in {
- let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsBigEndian] in {
- def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)),
- (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
- }
- let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsLittleEndian] in {
- def : Pat<(f128 (PPCbuild_fp128 i64:$rA, i64:$rB)),
- (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
- }
-}
-
-let Predicates = [HasP9Vector], hasSideEffects = 0 in {
- let mayStore = 1 in {
- def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
- (ins spilltovsrrc:$XT, memrr:$dst),
- "#SPILLTOVSR_STX", []>;
- def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
- "#SPILLTOVSR_ST", []>;
- }
- let mayLoad = 1 in {
- def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
- (ins memrr:$src),
- "#SPILLTOVSR_LDX", []>;
- def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
- "#SPILLTOVSR_LD", []>;
-
- }
-}
// Integer extend helper dags 32 -> 64
def AnyExts {
dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32);
@@ -3830,10 +2186,10 @@ def AnyExts {
}
def DblToFlt {
- dag A0 = (f32 (fpround (f64 (extractelt v2f64:$A, 0))));
- dag A1 = (f32 (fpround (f64 (extractelt v2f64:$A, 1))));
- dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0))));
- dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1))));
+ dag A0 = (f32 (any_fpround (f64 (extractelt v2f64:$A, 0))));
+ dag A1 = (f32 (any_fpround (f64 (extractelt v2f64:$A, 1))));
+ dag B0 = (f32 (any_fpround (f64 (extractelt v2f64:$B, 0))));
+ dag B1 = (f32 (any_fpround (f64 (extractelt v2f64:$B, 1))));
}
def ExtDbl {
@@ -4024,397 +2380,2261 @@ def MrgWords {
dag CVCAU = (v4i32 (XVCVDPUXWS CA));
}
-// Patterns for BUILD_VECTOR nodes.
+//---------------------------- Anonymous Patterns ----------------------------//
+// Predicate combinations are kept in roughly chronological order in terms of
+// instruction availability in the architecture. For example, VSX came in with
+// ISA 2.06 (Power7). There have since been additions in ISA 2.07 (Power8) and
+// ISA 3.0 (Power9). However, the granularity of features on later subtargets
+// is finer for various reasons. For example, we have Power8Vector,
+// Power8Altivec, DirectMove that all came in with ISA 2.07. The situation is
+// similar with ISA 3.0 with Power9Vector, Power9Altivec, IsISA3_0. Then there
+// are orthogonal predicates such as endianness for which the order was
+// arbitrarily chosen to be Big, Little.
+//
+// Predicate combinations available:
+// [HasVSX]
+// [HasVSX, IsBigEndian]
+// [HasVSX, IsLittleEndian]
+// [HasVSX, NoP9Vector]
+// [HasVSX, HasOnlySwappingMemOps]
+// [HasVSX, HasOnlySwappingMemOps, IsBigEndian]
+// [HasVSX, HasP8Vector]
+// [HasVSX, HasP8Vector, IsBigEndian]
+// [HasVSX, HasP8Vector, IsLittleEndian]
+// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian]
+// [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian]
+// [HasVSX, HasDirectMove]
+// [HasVSX, HasDirectMove, IsBigEndian]
+// [HasVSX, HasDirectMove, IsLittleEndian]
+// [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian]
+// [HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian]
+// [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian]
+// [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian]
+// [HasVSX, HasP9Vector]
+// [HasVSX, HasP9Vector, IsBigEndian]
+// [HasVSX, HasP9Vector, IsLittleEndian]
+// [HasVSX, HasP9Altivec]
+// [HasVSX, HasP9Altivec, IsBigEndian]
+// [HasVSX, HasP9Altivec, IsLittleEndian]
+// [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian]
+// [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian]
+
let AddedComplexity = 400 in {
+// Valid for any VSX subtarget, regardless of endianness.
+let Predicates = [HasVSX] in {
+def : Pat<(v4i32 (vnot_ppc v4i32:$A)),
+ (v4i32 (XXLNOR $A, $A))>;
+def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A),
+ (and v4i32:$B, v4i32:$C))),
+ (v4i32 (XXSEL $A, $B, $C))>;
- let Predicates = [HasVSX] in {
- // Build vectors of floating point converted to i32.
- def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A,
- DblToInt.A, DblToInt.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>;
- def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A,
- DblToUInt.A, DblToUInt.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>;
- def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
- (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC),
- (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>;
- def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
- (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC),
- (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>;
- def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
- def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
- def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
- (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
- def : Pat<(v2f64 (PPCldsplat xoaddr:$A)),
- (v2f64 (LXVDSX xoaddr:$A))>;
- def : Pat<(v2i64 (PPCldsplat xoaddr:$A)),
- (v2i64 (LXVDSX xoaddr:$A))>;
-
- // Build vectors of floating point converted to i64.
- def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
- def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
- def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)),
- (v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>;
- def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)),
- (v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>;
- }
+// Additional fnmsub pattern for PPC specific ISD opcode
+def : Pat<(PPCfnmsub f64:$A, f64:$B, f64:$C),
+ (XSNMSUBADP $C, $A, $B)>;
+def : Pat<(fneg (PPCfnmsub f64:$A, f64:$B, f64:$C)),
+ (XSMSUBADP $C, $A, $B)>;
+def : Pat<(PPCfnmsub f64:$A, f64:$B, (fneg f64:$C)),
+ (XSNMADDADP $C, $A, $B)>;
- let Predicates = [HasVSX, NoP9Vector] in {
- // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads).
- def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>;
- def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>;
- def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
- (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
- (XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
- def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
- (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
- (XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
- }
+def : Pat<(PPCfnmsub v2f64:$A, v2f64:$B, v2f64:$C),
+ (XVNMSUBADP $C, $A, $B)>;
+def : Pat<(fneg (PPCfnmsub v2f64:$A, v2f64:$B, v2f64:$C)),
+ (XVMSUBADP $C, $A, $B)>;
+def : Pat<(PPCfnmsub v2f64:$A, v2f64:$B, (fneg v2f64:$C)),
+ (XVNMADDADP $C, $A, $B)>;
- let Predicates = [IsBigEndian, HasP8Vector] in {
- def : Pat<DWToSPExtractConv.BVU,
- (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
- (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>;
- def : Pat<DWToSPExtractConv.BVS,
- (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
- (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
- def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
-
- // Elements in a register on a BE system are in order <0, 1, 2, 3>.
- // The store instructions store the second word from the left.
- // So to align element zero, we need to modulo-left-shift by 3 words.
- // Similar logic applies for elements 2 and 3.
- foreach Idx = [ [0,3], [2,1], [3,2] ] in {
- def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
- sub_64), xoaddr:$src)>;
- def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
- sub_64), xoaddr:$src)>;
- }
- }
+def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C),
+ (XVNMSUBASP $C, $A, $B)>;
+def : Pat<(fneg (PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C)),
+ (XVMSUBASP $C, $A, $B)>;
+def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, (fneg v4f32:$C)),
+ (XVNMADDASP $C, $A, $B)>;
- let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in {
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- xoaddr:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- xoaddr:$src)>;
- }
-
- // Big endian, available on all targets with VSX
- let Predicates = [IsBigEndian, HasVSX] in {
- def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
- (v2f64 (XXPERMDI
- (COPY_TO_REGCLASS $A, VSRC),
- (COPY_TO_REGCLASS $B, VSRC), 0))>;
- // Using VMRGEW to assemble the final vector would be a lower latency
- // solution. However, we choose to go with the slightly higher latency
- // XXPERMDI for 2 reasons:
- // 1. This is likely to occur in unrolled loops where regpressure is high,
- // so we want to use the latter as it has access to all 64 VSX registers.
- // 2. Using Altivec instructions in this sequence would likely cause the
- // allocation of Altivec registers even for the loads which in turn would
- // force the use of LXSIWZX for the loads, adding a cycle of latency to
- // each of the loads which would otherwise be able to use LFIWZX.
- def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
- (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32A, MrgFP.LD32B),
- (XXMRGHW MrgFP.LD32C, MrgFP.LD32D), 3))>;
- def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)),
- (VMRGEW MrgFP.AC, MrgFP.BD)>;
- def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
- DblToFlt.B0, DblToFlt.B1)),
- (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>;
-
- // Convert 4 doubles to a vector of ints.
- def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
- DblToInt.C, DblToInt.D)),
- (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>;
- def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
- DblToUInt.C, DblToUInt.D)),
- (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>;
- def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
- ExtDbl.B0S, ExtDbl.B1S)),
- (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>;
- def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
- ExtDbl.B0U, ExtDbl.B1U)),
- (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
- }
+def : Pat<(v2f64 (bitconvert v4f32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
- let Predicates = [IsLittleEndian, HasP8Vector] in {
- def : Pat<DWToSPExtractConv.BVU,
- (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3),
- (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>;
- def : Pat<DWToSPExtractConv.BVS,
- (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
- (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
- def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
-
- // Elements in a register on a LE system are in order <3, 2, 1, 0>.
- // The store instructions store the second word from the left.
- // So to align element 3, we need to modulo-left-shift by 3 words.
- // Similar logic applies for elements 0 and 1.
- foreach Idx = [ [0,2], [1,1], [3,3] ] in {
- def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
- sub_64), xoaddr:$src)>;
- def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
- sub_64), xoaddr:$src)>;
- }
- }
+def : Pat<(v4f32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
- let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in {
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- xoaddr:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- xoaddr:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- }
-
- let Predicates = [IsLittleEndian, HasVSX] in {
- // Little endian, available on all targets with VSX
- def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
- (v2f64 (XXPERMDI
- (COPY_TO_REGCLASS $B, VSRC),
- (COPY_TO_REGCLASS $A, VSRC), 0))>;
- // Using VMRGEW to assemble the final vector would be a lower latency
- // solution. However, we choose to go with the slightly higher latency
- // XXPERMDI for 2 reasons:
- // 1. This is likely to occur in unrolled loops where regpressure is high,
- // so we want to use the latter as it has access to all 64 VSX registers.
- // 2. Using Altivec instructions in this sequence would likely cause the
- // allocation of Altivec registers even for the loads which in turn would
- // force the use of LXSIWZX for the loads, adding a cycle of latency to
- // each of the loads which would otherwise be able to use LFIWZX.
- def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
- (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32D, MrgFP.LD32C),
- (XXMRGHW MrgFP.LD32B, MrgFP.LD32A), 3))>;
- def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)),
- (VMRGEW MrgFP.AC, MrgFP.BD)>;
- def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
- DblToFlt.B0, DblToFlt.B1)),
- (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>;
-
- // Convert 4 doubles to a vector of ints.
- def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
- DblToInt.C, DblToInt.D)),
- (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>;
- def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
- DblToUInt.C, DblToUInt.D)),
- (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>;
- def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
- ExtDbl.B0S, ExtDbl.B1S)),
- (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>;
- def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
- ExtDbl.B0U, ExtDbl.B1U)),
- (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>;
- }
+def : Pat<(v2i64 (bitconvert v4f32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
- let Predicates = [HasDirectMove] in {
- // Endianness-neutral constant splat on P8 and newer targets. The reason
- // for this pattern is that on targets with direct moves, we don't expand
- // BUILD_VECTOR nodes for v4i32.
- def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A,
- immSExt5NonZero:$A, immSExt5NonZero:$A)),
- (v4i32 (VSPLTISW imm:$A))>;
- }
+def : Pat<(v4f32 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
- let Predicates = [IsBigEndian, HasDirectMove, NoP9Vector] in {
- // Big endian integer vectors using direct moves.
- def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
- (v2i64 (XXPERMDI
- (COPY_TO_REGCLASS (MTVSRD $A), VSRC),
- (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (XXPERMDI
- (COPY_TO_REGCLASS
- (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC),
- (COPY_TO_REGCLASS
- (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
- (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
- }
+def : Pat<(v2f64 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v2i64 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
- let Predicates = [IsLittleEndian, HasDirectMove, NoP9Vector] in {
- // Little endian integer vectors using direct moves.
- def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
- (v2i64 (XXPERMDI
- (COPY_TO_REGCLASS (MTVSRD $B), VSRC),
- (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (XXPERMDI
- (COPY_TO_REGCLASS
- (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC),
- (COPY_TO_REGCLASS
- (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
- (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
- }
+def : Pat<(v2f64 (bitconvert v1i128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v1i128 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
- let Predicates = [HasP8Vector] in {
- def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))),
- (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
- def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))),
- (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
- def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))),
- (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
- def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))),
- (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
- }
+def : Pat<(v2i64 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
- let Predicates = [HasP9Vector] in {
- // Endianness-neutral patterns for const splats with ISA 3.0 instructions.
- def : Pat<(v4i32 (scalar_to_vector i32:$A)),
- (v4i32 (MTVSRWS $A))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
- (v4i32 (MTVSRWS $A))>;
- def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)),
- (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
- def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
- (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>;
- def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
- (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
- def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
- def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
- def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
- (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
- (DFLOADf32 iaddrX4:$A),
- VSFRC)), 0))>;
- def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
- (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
- (DFLOADf32 iaddrX4:$A),
- VSFRC)), 0))>;
- def : Pat<(v4f32 (PPCldsplat xoaddr:$A)),
- (v4f32 (LXVWSX xoaddr:$A))>;
- def : Pat<(v4i32 (PPCldsplat xoaddr:$A)),
- (v4i32 (LXVWSX xoaddr:$A))>;
- }
+def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)),
+ (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>;
+def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)),
+ (v2f64 (XVCVSXWDP (v2i64 (XXMRGLW $C, $C))))>;
- let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {
- def : Pat<(i64 (extractelt v2i64:$A, 1)),
- (i64 (MFVSRLD $A))>;
- // Better way to build integer vectors if we have MTVSRDD. Big endian.
- def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
- (v2i64 (MTVSRDD $rB, $rA))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (MTVSRDD
- (RLDIMI AnyExts.B, AnyExts.A, 32, 0),
- (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>;
- }
+def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)),
+ (v2f64 (XVCVUXWDP (v2i64 (XXMRGHW $C, $C))))>;
+def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
+ (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
- let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
- def : Pat<(i64 (extractelt v2i64:$A, 0)),
- (i64 (MFVSRLD $A))>;
- // Better way to build integer vectors if we have MTVSRDD. Little endian.
- def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
- (v2i64 (MTVSRDD $rB, $rA))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (MTVSRDD
- (RLDIMI AnyExts.C, AnyExts.D, 32, 0),
- (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>;
- }
- // P9 Altivec instructions that can be used to build vectors.
- // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
- // with complexities of existing build vector patterns in this file.
- let Predicates = [HasP9Altivec, IsLittleEndian] in {
- def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)),
- (v2i64 (VEXTSW2D $A))>;
- def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)),
- (v2i64 (VEXTSH2D $A))>;
- def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1,
- HWordToWord.LE_A2, HWordToWord.LE_A3)),
- (v4i32 (VEXTSH2W $A))>;
- def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1,
- ByteToWord.LE_A2, ByteToWord.LE_A3)),
- (v4i32 (VEXTSB2W $A))>;
- def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)),
- (v2i64 (VEXTSB2D $A))>;
- }
+def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>;
+def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>;
- let Predicates = [HasP9Altivec, IsBigEndian] in {
- def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)),
- (v2i64 (VEXTSW2D $A))>;
- def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)),
- (v2i64 (VEXTSH2D $A))>;
- def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1,
- HWordToWord.BE_A2, HWordToWord.BE_A3)),
- (v4i32 (VEXTSH2W $A))>;
- def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1,
- ByteToWord.BE_A2, ByteToWord.BE_A3)),
- (v4i32 (VEXTSB2W $A))>;
- def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)),
- (v2i64 (VEXTSB2D $A))>;
- }
+// Permutes.
+def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
- let Predicates = [HasP9Altivec] in {
- def: Pat<(v2i64 (PPCSExtVElems v16i8:$A)),
- (v2i64 (VEXTSB2D $A))>;
- def: Pat<(v2i64 (PPCSExtVElems v8i16:$A)),
- (v2i64 (VEXTSH2D $A))>;
- def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)),
- (v2i64 (VEXTSW2D $A))>;
- def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)),
- (v4i32 (VEXTSB2W $A))>;
- def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)),
- (v4i32 (VEXTSH2W $A))>;
- }
+// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and
+// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable.
+def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)),
+ (XXPERMDI $src, $src, 2)>;
+
+// Selects.
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
+ (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)),
+ (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)),
+ (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)),
+ (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),
+ (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)),
+ (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)),
+ (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)),
+ (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)),
+ (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),
+ (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+ (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),
+ (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+ (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),
+ (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
+ (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+ (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),
+ (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+ (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
+ (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
+ (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+// Divides.
+def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
+ (XVDIVSP $A, $B)>;
+def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
+ (XVDIVDP $A, $B)>;
+
+// Reciprocal estimate
+def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
+ (XVRESP $A)>;
+def : Pat<(int_ppc_vsx_xvredp v2f64:$A),
+ (XVREDP $A)>;
+
+// Recip. square root estimate
+def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
+ (XVRSQRTESP $A)>;
+def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
+ (XVRSQRTEDP $A)>;
+
+// Vector selection
+def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
+ (COPY_TO_REGCLASS
+ (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+ (COPY_TO_REGCLASS $vB, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
+ (COPY_TO_REGCLASS
+ (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+ (COPY_TO_REGCLASS $vB, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+
+def : Pat<(v4f32 (any_fmaxnum v4f32:$src1, v4f32:$src2)),
+ (v4f32 (XVMAXSP $src1, $src2))>;
+def : Pat<(v4f32 (any_fminnum v4f32:$src1, v4f32:$src2)),
+ (v4f32 (XVMINSP $src1, $src2))>;
+def : Pat<(v2f64 (any_fmaxnum v2f64:$src1, v2f64:$src2)),
+ (v2f64 (XVMAXDP $src1, $src2))>;
+def : Pat<(v2f64 (any_fminnum v2f64:$src1, v2f64:$src2)),
+ (v2f64 (XVMINDP $src1, $src2))>;
+
+// f32 abs
+def : Pat<(f32 (fabs f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSABSDP
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+
+// f32 nabs
+def : Pat<(f32 (fneg (fabs f32:$S))),
+ (f32 (COPY_TO_REGCLASS (XSNABSDP
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+
+// f32 Min.
+def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
+ (f32 FpMinMax.F32Min)>;
+def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
+ (f32 FpMinMax.F32Min)>;
+def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Min)>;
+def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Min)>;
+// F32 Max.
+def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
+ (f32 FpMinMax.F32Max)>;
+def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
+ (f32 FpMinMax.F32Max)>;
+def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Max)>;
+def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Max)>;
+
+// f64 Min.
+def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
+ (f64 (XSMINDP $A, $B))>;
+def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
+ (f64 (XSMINDP $A, $B))>;
+def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
+ (f64 (XSMINDP $A, $B))>;
+def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+ (f64 (XSMINDP $A, $B))>;
+// f64 Max.
+def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
+ (f64 (XSMAXDP $A, $B))>;
+def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
+ (f64 (XSMAXDP $A, $B))>;
+def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
+ (f64 (XSMAXDP $A, $B))>;
+def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+ (f64 (XSMAXDP $A, $B))>;
+
+def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
+ (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
+ (STXVW4X $rS, xoaddr:$dst)>;
+def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+
+// Rounding for single precision.
+def : Pat<(f32 (any_fround f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPI
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (any_fnearbyint f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIC
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (any_ffloor f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIM
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (any_fceil f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIP
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (any_ftrunc f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIZ
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (any_frint f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIC
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
+
+// Rounding for double precision.
+def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>;
+def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
+
+// Materialize a zero-vector of long long
+def : Pat<(v2i64 immAllZerosV),
+ (v2i64 (XXLXORz))>;
+
+// Build vectors of floating point converted to i32.
+def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A,
+ DblToInt.A, DblToInt.A)),
+ (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>;
+def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A,
+ DblToUInt.A, DblToUInt.A)),
+ (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>;
+def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
+ (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC),
+ (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>;
+def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
+ (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC),
+ (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>;
+defm : ScalToVecWPermute<
+ v4i32, FltToIntLoad.A,
+ (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1),
+ (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC)>;
+defm : ScalToVecWPermute<
+ v4i32, FltToUIntLoad.A,
+ (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1),
+ (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC)>;
+def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
+ (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
+def : Pat<(v2f64 (PPCldsplat xoaddr:$A)),
+ (v2f64 (LXVDSX xoaddr:$A))>;
+def : Pat<(v2i64 (PPCldsplat xoaddr:$A)),
+ (v2i64 (LXVDSX xoaddr:$A))>;
+
+// Build vectors of floating point converted to i64.
+def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
+def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
+defm : ScalToVecWPermute<
+ v2i64, DblToLongLoad.A,
+ (XVCVDPSXDS (LXVDSX xoaddr:$A)), (XVCVDPSXDS (LXVDSX xoaddr:$A))>;
+defm : ScalToVecWPermute<
+ v2i64, DblToULongLoad.A,
+ (XVCVDPUXDS (LXVDSX xoaddr:$A)), (XVCVDPUXDS (LXVDSX xoaddr:$A))>;
+} // HasVSX
+
+// Any big endian VSX subtarget.
+let Predicates = [HasVSX, IsBigEndian] in {
+def : Pat<(v2f64 (scalar_to_vector f64:$A)),
+ (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
+
+def : Pat<(f64 (extractelt v2f64:$S, 0)),
+ (f64 (EXTRACT_SUBREG $S, sub_64))>;
+def : Pat<(f64 (extractelt v2f64:$S, 1)),
+ (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+
+def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
+ (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>;
+
+def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
+ (v2f64 (XXPERMDI
+ (COPY_TO_REGCLASS $A, VSRC),
+ (COPY_TO_REGCLASS $B, VSRC), 0))>;
+// Using VMRGEW to assemble the final vector would be a lower latency
+// solution. However, we choose to go with the slightly higher latency
+// XXPERMDI for 2 reasons:
+// 1. This is likely to occur in unrolled loops where regpressure is high,
+// so we want to use the latter as it has access to all 64 VSX registers.
+// 2. Using Altivec instructions in this sequence would likely cause the
+// allocation of Altivec registers even for the loads which in turn would
+// force the use of LXSIWZX for the loads, adding a cycle of latency to
+// each of the loads which would otherwise be able to use LFIWZX.
+def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
+ (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32A, MrgFP.LD32B),
+ (XXMRGHW MrgFP.LD32C, MrgFP.LD32D), 3))>;
+def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)),
+ (VMRGEW MrgFP.AC, MrgFP.BD)>;
+def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
+ DblToFlt.B0, DblToFlt.B1)),
+ (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>;
+
+// Convert 4 doubles to a vector of ints.
+def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
+ DblToInt.C, DblToInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>;
+def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
+ DblToUInt.C, DblToUInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>;
+def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
+ ExtDbl.B0S, ExtDbl.B1S)),
+ (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>;
+def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
+ ExtDbl.B0U, ExtDbl.B1U)),
+ (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 1))))),
+ (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 0))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
+ (XVCVSPDP (XXMRGHW $A, $A)), 2))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XVCVSPDP $A))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 3)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
+ (XVCVSPDP (XXMRGLW $A, $A)), 2))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$B, 0))))),
+ (v2f64 (XVCVSPDP (XXPERMDI $A, $B, 0)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$B, 3))))),
+ (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3),
+ (XXPERMDI $A, $B, 3), 1)))>;
+def : Pat<WToDPExtractConv.BV02S,
+ (v2f64 (XVCVSXWDP $A))>;
+def : Pat<WToDPExtractConv.BV13S,
+ (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 3)))>;
+def : Pat<WToDPExtractConv.BV02U,
+ (v2f64 (XVCVUXWDP $A))>;
+def : Pat<WToDPExtractConv.BV13U,
+ (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 3)))>;
+} // HasVSX, IsBigEndian
+
+// Any little endian VSX subtarget.
+let Predicates = [HasVSX, IsLittleEndian] in {
+defm : ScalToVecWPermute<v2f64, (f64 f64:$A),
+ (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
+ (SUBREG_TO_REG (i64 1), $A, sub_64), 0),
+ (SUBREG_TO_REG (i64 1), $A, sub_64)>;
+
+def : Pat<(f64 (extractelt v2f64:$S, 0)),
+ (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+def : Pat<(f64 (extractelt v2f64:$S, 1)),
+ (f64 (EXTRACT_SUBREG $S, sub_64))>;
+
+def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+
+def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
+ (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
+
+// Little endian, available on all targets with VSX
+def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
+ (v2f64 (XXPERMDI
+ (COPY_TO_REGCLASS $B, VSRC),
+ (COPY_TO_REGCLASS $A, VSRC), 0))>;
+// Using VMRGEW to assemble the final vector would be a lower latency
+// solution. However, we choose to go with the slightly higher latency
+// XXPERMDI for 2 reasons:
+// 1. This is likely to occur in unrolled loops where regpressure is high,
+// so we want to use the latter as it has access to all 64 VSX registers.
+// 2. Using Altivec instructions in this sequence would likely cause the
+// allocation of Altivec registers even for the loads which in turn would
+// force the use of LXSIWZX for the loads, adding a cycle of latency to
+// each of the loads which would otherwise be able to use LFIWZX.
+def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
+ (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32D, MrgFP.LD32C),
+ (XXMRGHW MrgFP.LD32B, MrgFP.LD32A), 3))>;
+def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)),
+ (VMRGEW MrgFP.AC, MrgFP.BD)>;
+def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
+ DblToFlt.B0, DblToFlt.B1)),
+ (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>;
+
+// Convert 4 doubles to a vector of ints.
+def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
+ DblToInt.C, DblToInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>;
+def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
+ DblToUInt.C, DblToUInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>;
+def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
+ ExtDbl.B0S, ExtDbl.B1S)),
+ (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>;
+def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
+ ExtDbl.B0U, ExtDbl.B1U)),
+ (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 1))))),
+ (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 0))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
+ (XVCVSPDP (XXMRGLW $A, $A)), 2))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 1)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP $A))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
+ (XVCVSPDP (XXMRGHW $A, $A)), 2))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$B, 0))))),
+ (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $B, $A, 3),
+ (XXPERMDI $B, $A, 3), 1)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$B, 3))))),
+ (v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>;
+def : Pat<WToDPExtractConv.BV02S,
+ (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
+def : Pat<WToDPExtractConv.BV13S,
+ (v2f64 (XVCVSXWDP $A))>;
+def : Pat<WToDPExtractConv.BV02U,
+ (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 1)))>;
+def : Pat<WToDPExtractConv.BV13U,
+ (v2f64 (XVCVUXWDP $A))>;
+} // HasVSX, IsLittleEndian
+
+// Any pre-Power9 VSX subtarget.
+let Predicates = [HasVSX, NoP9Vector] in {
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8),
+ (STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8),
+ (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>;
+
+// Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads).
+defm : ScalToVecWPermute<
+ v4i32, DblToIntLoad.A,
+ (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1),
+ (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC)>;
+defm : ScalToVecWPermute<
+ v4i32, DblToUIntLoad.A,
+ (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1),
+ (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC)>;
+defm : ScalToVecWPermute<
+ v2i64, FltToLongLoad.A,
+ (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0),
+ (SUBREG_TO_REG (i64 1), (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A),
+ VSFRC)), sub_64)>;
+defm : ScalToVecWPermute<
+ v2i64, FltToULongLoad.A,
+ (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0),
+ (SUBREG_TO_REG (i64 1), (XSCVDPUXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A),
+ VSFRC)), sub_64)>;
+} // HasVSX, NoP9Vector
+
+// Any VSX subtarget that only has loads and stores that load in big endian
+// order regardless of endianness. This is really pre-Power9 subtargets.
+let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
+ def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+
+ // Stores.
+ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
+ (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+} // HasVSX, HasOnlySwappingMemOps
+
+// Big endian VSX subtarget that only has loads and stores that always load
+// in big endian order. Really big endian pre-Power9 subtargets.
+let Predicates = [HasVSX, HasOnlySwappingMemOps, IsBigEndian] in {
+ def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+ (STXVW4X $rS, xoaddr:$dst)>;
+} // HasVSX, HasOnlySwappingMemOps, IsBigEndian
+
+// Any Power8 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector] in {
+def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
+ (XXLEQV $A, $B)>;
+def : Pat<(f64 (extloadf32 xoaddr:$src)),
+ (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>;
+def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))),
+ (f32 (XFLOADf32 xoaddr:$src))>;
+def : Pat<(f64 (any_fpextend f32:$src)),
+ (COPY_TO_REGCLASS $src, VSFRC)>;
+
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+ (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),
+ (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+ (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),
+ (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
+ (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+ (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),
+ (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+ (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),
+ (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
+ (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+// Additional fnmsub pattern for PPC specific ISD opcode
+def : Pat<(PPCfnmsub f32:$A, f32:$B, f32:$C),
+ (XSNMSUBASP $C, $A, $B)>;
+def : Pat<(fneg (PPCfnmsub f32:$A, f32:$B, f32:$C)),
+ (XSMSUBASP $C, $A, $B)>;
+def : Pat<(PPCfnmsub f32:$A, f32:$B, (fneg f32:$C)),
+ (XSNMADDASP $C, $A, $B)>;
+
+// f32 neg
+// Although XSNEGDP is available in P7, we want to select it starting from P8,
+// so that FNMSUBS can be selected for fneg-fmsub pattern on P7. (VSX version,
+// XSNMSUBASP, is available since P8)
+def : Pat<(f32 (fneg f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSNEGDP
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+
+// Instructions for converting float to i32 feeding a store.
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4),
+ (STIWX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4),
+ (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
+
+def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+
+def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))),
+ (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))),
+ (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))),
+ (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))),
+ (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+} // HasVSX, HasP8Vector
+
+// Big endian Power8 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector, IsBigEndian] in {
+def : Pat<DWToSPExtractConv.El0SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+def : Pat<DWToSPExtractConv.El1SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+def : Pat<DWToSPExtractConv.El0US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+def : Pat<DWToSPExtractConv.El1US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+
+// v4f32 scalar <-> vector conversions (BE)
+def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (v4f32 (XSCVDPSPN $A))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (f32 (XSCVSPDPN $S))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
+ (f32 VectorExtractions.BE_VARIABLE_FLOAT)>;
+
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
+
+// LIWAX - This instruction is used for sign extending i32 -> i64.
+// LIWZX - This instruction will be emitted for i32, f32, and when
+// zero-extending i32 to i64 (zext i32 -> i64).
+def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>;
+def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
+ (v4i32 (XXSLDWIs
+ (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
+def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
+ (v4f32 (XXSLDWIs
+ (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
+
+def : Pat<DWToSPExtractConv.BVU,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
+ (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>;
+def : Pat<DWToSPExtractConv.BVS,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
+ (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+// Elements in a register on a BE system are in order <0, 1, 2, 3>.
+// The store instructions store the second word from the left.
+// So to align element zero, we need to modulo-left-shift by 3 words.
+// Similar logic applies for elements 2 and 3.
+foreach Idx = [ [0,3], [2,1], [3,2] ] in {
+ def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
}
+} // HasVSX, HasP8Vector, IsBigEndian
+
+// Little endian Power8 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector, IsLittleEndian] in {
+def : Pat<DWToSPExtractConv.El0SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+def : Pat<DWToSPExtractConv.El1SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS
+ (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
+def : Pat<DWToSPExtractConv.El0US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+def : Pat<DWToSPExtractConv.El1US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS
+ (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
-// Put this P9Altivec related definition here since it's possible to be
-// selected to VSX instruction xvnegsp, avoid possible undef.
-let Predicates = [HasP9Altivec] in {
+// v4f32 scalar <-> vector conversions (LE)
+ // The permuted version is no better than the version that puts the value
+ // into the right element because XSCVDPSPN is different from all the other
+ // instructions used for PPCSToV.
+ defm : ScalToVecWPermute<v4f32, (f32 f32:$A),
+ (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1),
+ (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 3)>;
+def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (f32 (XSCVSPDPN $S))>;
+def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
+ (f32 VectorExtractions.LE_VARIABLE_FLOAT)>;
+
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
+
+// LIWAX - This instruction is used for sign extending i32 -> i64.
+// LIWZX - This instruction will be emitted for i32, f32, and when
+// zero-extending i32 to i64 (zext i32 -> i64).
+defm : ScalToVecWPermute<
+ v2i64, (i64 (sextloadi32 xoaddr:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (LIWAX xoaddr:$src), sub_64)>;
+
+defm : ScalToVecWPermute<
+ v2i64, (i64 (zextloadi32 xoaddr:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>;
+
+defm : ScalToVecWPermute<
+ v4i32, (i32 (load xoaddr:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>;
+
+defm : ScalToVecWPermute<
+ v4f32, (f32 (load xoaddr:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>;
+
+def : Pat<DWToSPExtractConv.BVU,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3),
+ (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>;
+def : Pat<DWToSPExtractConv.BVS,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
+ (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+// Elements in a register on a LE system are in order <3, 2, 1, 0>.
+// The store instructions store the second word from the left.
+// So to align element 3, we need to modulo-left-shift by 3 words.
+// Similar logic applies for elements 0 and 1.
+foreach Idx = [ [0,2], [1,1], [3,3] ] in {
+ def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+}
+} // HasVSX, HasP8Vector, IsLittleEndian
+
+// Big endian pre-Power9 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian] in {
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+} // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian
+
+// Little endian pre-Power9 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] in {
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+} // HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian
+
+// Any VSX target with direct moves.
+let Predicates = [HasVSX, HasDirectMove] in {
+// bitconvert f32 -> i32
+// (convert to 32-bit fp single, shift right 1 word, move to GPR)
+def : Pat<(i32 (bitconvert f32:$S)),
+ (i32 (MFVSRWZ (EXTRACT_SUBREG
+ (XXSLDWI (XSCVDPSPN $S), (XSCVDPSPN $S), 3),
+ sub_64)))>;
+// bitconvert i32 -> f32
+// (move to FPR, shift left 1 word, convert to 64-bit fp single)
+def : Pat<(f32 (bitconvert i32:$A)),
+ (f32 (XSCVSPDPN
+ (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>;
- def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))),
- (v4i32 (VABSDUW $A, $B))>;
+// bitconvert f64 -> i64
+// (move to GPR, nothing else needed)
+def : Pat<(i64 (bitconvert f64:$S)),
+ (i64 (MFVSRD $S))>;
- def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))),
- (v8i16 (VABSDUH $A, $B))>;
+// bitconvert i64 -> f64
+// (move to FPR, nothing else needed)
+def : Pat<(f64 (bitconvert i64:$S)),
+ (f64 (MTVSRD $S))>;
- def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))),
- (v16i8 (VABSDUB $A, $B))>;
+// Rounding to integer.
+def : Pat<(i64 (lrint f64:$S)),
+ (i64 (MFVSRD (FCTID $S)))>;
+def : Pat<(i64 (lrint f32:$S)),
+ (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
+def : Pat<(i64 (llrint f64:$S)),
+ (i64 (MFVSRD (FCTID $S)))>;
+def : Pat<(i64 (llrint f32:$S)),
+ (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
+def : Pat<(i64 (lround f64:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
+def : Pat<(i64 (lround f32:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
+def : Pat<(i64 (llround f64:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
+def : Pat<(i64 (llround f32:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
+
+// Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead
+// of f64
+def : Pat<(v8i16 (PPCmtvsrz i32:$A)),
+ (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
+def : Pat<(v16i8 (PPCmtvsrz i32:$A)),
+ (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
+
+// Endianness-neutral constant splat on P8 and newer targets. The reason
+// for this pattern is that on targets with direct moves, we don't expand
+// BUILD_VECTOR nodes for v4i32.
+def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A,
+ immSExt5NonZero:$A, immSExt5NonZero:$A)),
+ (v4i32 (VSPLTISW imm:$A))>;
+} // HasVSX, HasDirectMove
+
+// Big endian VSX subtarget with direct moves.
+let Predicates = [HasVSX, HasDirectMove, IsBigEndian] in {
+// v16i8 scalar <-> vector conversions (BE)
+def : Pat<(v16i8 (scalar_to_vector i32:$A)),
+ (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>;
+def : Pat<(v8i16 (scalar_to_vector i32:$A)),
+ (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>;
+def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+ (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>;
+def : Pat<(v2i64 (scalar_to_vector i64:$A)),
+ (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>;
+
+// v2i64 scalar <-> vector conversions (BE)
+def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+ (i64 VectorExtractions.LE_DWORD_1)>;
+def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+ (i64 VectorExtractions.LE_DWORD_0)>;
+def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
+ (i64 VectorExtractions.BE_VARIABLE_DWORD)>;
+} // HasVSX, HasDirectMove, IsBigEndian
+
+// Little endian VSX subtarget with direct moves.
+let Predicates = [HasVSX, HasDirectMove, IsLittleEndian] in {
+ // v16i8 scalar <-> vector conversions (LE)
+ defm : ScalToVecWPermute<v16i8, (i32 i32:$A),
+ (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC),
+ (COPY_TO_REGCLASS MovesToVSR.LE_WORD_1, VSRC)>;
+ defm : ScalToVecWPermute<v8i16, (i32 i32:$A),
+ (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC),
+ (COPY_TO_REGCLASS MovesToVSR.LE_WORD_1, VSRC)>;
+ defm : ScalToVecWPermute<v4i32, (i32 i32:$A), MovesToVSR.LE_WORD_0,
+ (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
+ defm : ScalToVecWPermute<v2i64, (i64 i64:$A), MovesToVSR.LE_DWORD_0,
+ MovesToVSR.LE_DWORD_1>;
+
+ // v2i64 scalar <-> vector conversions (LE)
+ def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+ (i64 VectorExtractions.LE_DWORD_0)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+ (i64 VectorExtractions.LE_DWORD_1)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
+ (i64 VectorExtractions.LE_VARIABLE_DWORD)>;
+} // HasVSX, HasDirectMove, IsLittleEndian
+
+// Big endian pre-P9 VSX subtarget with direct moves.
+let Predicates = [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian] in {
+def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 VectorExtractions.LE_BYTE_15)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 VectorExtractions.LE_BYTE_14)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 VectorExtractions.LE_BYTE_13)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 VectorExtractions.LE_BYTE_12)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 VectorExtractions.LE_BYTE_11)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 VectorExtractions.LE_BYTE_10)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 VectorExtractions.LE_BYTE_9)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 VectorExtractions.LE_BYTE_8)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 VectorExtractions.LE_BYTE_7)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 VectorExtractions.LE_BYTE_6)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 VectorExtractions.LE_BYTE_5)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 VectorExtractions.LE_BYTE_4)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 VectorExtractions.LE_BYTE_3)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 VectorExtractions.LE_BYTE_2)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 VectorExtractions.LE_BYTE_1)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 VectorExtractions.LE_BYTE_0)>;
+def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_BYTE)>;
+
+// v8i16 scalar <-> vector conversions (BE)
+def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 VectorExtractions.LE_HALF_7)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 VectorExtractions.LE_HALF_6)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 VectorExtractions.LE_HALF_5)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 VectorExtractions.LE_HALF_4)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 VectorExtractions.LE_HALF_3)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 VectorExtractions.LE_HALF_2)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 VectorExtractions.LE_HALF_1)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+ (i32 VectorExtractions.LE_HALF_0)>;
+def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_HALF)>;
+
+// v4i32 scalar <-> vector conversions (BE)
+def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 VectorExtractions.LE_WORD_3)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_1)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 VectorExtractions.LE_WORD_0)>;
+def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_WORD)>;
+} // HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian
+
+// Little endian pre-P9 VSX subtarget with direct moves.
+let Predicates = [HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian] in {
+def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 VectorExtractions.LE_BYTE_0)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 VectorExtractions.LE_BYTE_1)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 VectorExtractions.LE_BYTE_2)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 VectorExtractions.LE_BYTE_3)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 VectorExtractions.LE_BYTE_4)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 VectorExtractions.LE_BYTE_5)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 VectorExtractions.LE_BYTE_6)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 VectorExtractions.LE_BYTE_7)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 VectorExtractions.LE_BYTE_8)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 VectorExtractions.LE_BYTE_9)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 VectorExtractions.LE_BYTE_10)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 VectorExtractions.LE_BYTE_11)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 VectorExtractions.LE_BYTE_12)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 VectorExtractions.LE_BYTE_13)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 VectorExtractions.LE_BYTE_14)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 VectorExtractions.LE_BYTE_15)>;
+def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_BYTE)>;
+
+// v8i16 scalar <-> vector conversions (LE)
+def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 VectorExtractions.LE_HALF_0)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 VectorExtractions.LE_HALF_1)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 VectorExtractions.LE_HALF_2)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 VectorExtractions.LE_HALF_3)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 VectorExtractions.LE_HALF_4)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 VectorExtractions.LE_HALF_5)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 VectorExtractions.LE_HALF_6)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+ (i32 VectorExtractions.LE_HALF_7)>;
+def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_HALF)>;
+
+// v4i32 scalar <-> vector conversions (LE)
+def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 VectorExtractions.LE_WORD_0)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_1)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 VectorExtractions.LE_WORD_3)>;
+def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_WORD)>;
+} // HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian
+
+// Big endian pre-Power9 VSX subtarget that has direct moves.
+let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian] in {
+// Big endian integer vectors using direct moves.
+def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
+ (v2i64 (XXPERMDI
+ (COPY_TO_REGCLASS (MTVSRD $A), VSRC),
+ (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+ (XXPERMDI
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC),
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+ (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
+} // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian
+
+// Little endian pre-Power9 VSX subtarget that has direct moves.
+let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian] in {
+// Little endian integer vectors using direct moves.
+def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
+ (v2i64 (XXPERMDI
+ (COPY_TO_REGCLASS (MTVSRD $B), VSRC),
+ (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+ (XXPERMDI
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC),
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+ (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
+}
+
+// Any Power9 VSX subtarget.
+let Predicates = [HasVSX, HasP9Vector] in {
+// Additional fnmsub pattern for PPC specific ISD opcode
+def : Pat<(PPCfnmsub f128:$A, f128:$B, f128:$C),
+ (XSNMSUBQP $C, $A, $B)>;
+def : Pat<(fneg (PPCfnmsub f128:$A, f128:$B, f128:$C)),
+ (XSMSUBQP $C, $A, $B)>;
+def : Pat<(PPCfnmsub f128:$A, f128:$B, (fneg f128:$C)),
+ (XSNMADDQP $C, $A, $B)>;
+
+def : Pat<(f128 (sint_to_fp i64:$src)),
+ (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (sint_to_fp (i64 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVSDQP $src))>;
+def : Pat<(f128 (sint_to_fp (i32 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVSDQP (VEXTSW2Ds $src)))>;
+def : Pat<(f128 (uint_to_fp i64:$src)),
+ (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (uint_to_fp (i64 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVUDQP $src))>;
+
+// Convert (Un)Signed Word -> QP.
+def : Pat<(f128 (sint_to_fp i32:$src)),
+ (f128 (XSCVSDQP (MTVSRWA $src)))>;
+def : Pat<(f128 (sint_to_fp (i32 (load xoaddr:$src)))),
+ (f128 (XSCVSDQP (LIWAX xoaddr:$src)))>;
+def : Pat<(f128 (uint_to_fp i32:$src)),
+ (f128 (XSCVUDQP (MTVSRWZ $src)))>;
+def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))),
+ (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>;
+
+// Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
+// separate pattern so that it can convert the input register class from
+// VRRC(v8i16) to VSRC.
+def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)),
+ (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>;
+
+// Use current rounding mode
+def : Pat<(f128 (any_fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>;
+// Round to nearest, ties away from zero
+def : Pat<(f128 (any_fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>;
+// Round towards Zero
+def : Pat<(f128 (any_ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>;
+// Round towards +Inf
+def : Pat<(f128 (any_fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>;
+// Round towards -Inf
+def : Pat<(f128 (any_ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>;
+// Use current rounding mode, [with Inexact]
+def : Pat<(f128 (any_frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>;
+
+def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)),
+ (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>;
+
+def : Pat<(i64 (int_ppc_scalar_extract_expq f128:$vA)),
+ (i64 (MFVSRD (EXTRACT_SUBREG
+ (v2i64 (XSXEXPQP $vA)), sub_64)))>;
+
+// Extra patterns expanding to vector Extract Word/Insert Word
+def : Pat<(v4i32 (int_ppc_vsx_xxinsertw v4i32:$A, v2i64:$B, imm:$IMM)),
+ (v4i32 (XXINSERTW $A, $B, imm:$IMM))>;
+def : Pat<(v2i64 (int_ppc_vsx_xxextractuw v2i64:$A, imm:$IMM)),
+ (v2i64 (COPY_TO_REGCLASS (XXEXTRACTUW $A, imm:$IMM), VSRC))>;
+
+// Vector Reverse
+def : Pat<(v8i16 (bswap v8i16 :$A)),
+ (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
+def : Pat<(v1i128 (bswap v1i128 :$A)),
+ (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
+
+// D-Form Load/Store
+def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(f128 (quadwOffsetLoad iaddrX16:$src)),
+ (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>;
+def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>;
+
+def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+def : Pat<(quadwOffsetStore f128:$rS, iaddrX16:$dst),
+ (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>;
+def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst),
+ (STXV $rS, memrix16:$dst)>;
+def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst),
+ (STXV $rS, memrix16:$dst)>;
+
+def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(f128 (nonQuadwOffsetLoad xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVX xoaddr:$src), VRRC)>;
+def : Pat<(nonQuadwOffsetStore f128:$rS, xoaddr:$dst),
+ (STXVX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
+def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+
+// Build vectors from i8 loads
+defm : ScalToVecWPermute<v16i8, ScalarLoads.Li8,
+ (VSPLTBs 7, (LXSIBZX xoaddr:$src)),
+ (VSPLTBs 7, (LXSIBZX xoaddr:$src))>;
+defm : ScalToVecWPermute<v8i16, ScalarLoads.ZELi8,
+ (VSPLTHs 3, (LXSIBZX xoaddr:$src)),
+ (VSPLTHs 3, (LXSIBZX xoaddr:$src))>;
+defm : ScalToVecWPermute<v4i32, ScalarLoads.ZELi8,
+ (XXSPLTWs (LXSIBZX xoaddr:$src), 1),
+ (XXSPLTWs (LXSIBZX xoaddr:$src), 1)>;
+defm : ScalToVecWPermute<v2i64, ScalarLoads.ZELi8i64,
+ (XXPERMDIs (LXSIBZX xoaddr:$src), 0),
+ (XXPERMDIs (LXSIBZX xoaddr:$src), 0)>;
+defm : ScalToVecWPermute<v4i32, ScalarLoads.SELi8,
+ (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1),
+ (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1)>;
+defm : ScalToVecWPermute<v2i64, ScalarLoads.SELi8i64,
+ (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0),
+ (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0)>;
+
+// Build vectors from i16 loads
+defm : ScalToVecWPermute<v8i16, ScalarLoads.Li16,
+ (VSPLTHs 3, (LXSIHZX xoaddr:$src)),
+ (VSPLTHs 3, (LXSIHZX xoaddr:$src))>;
+defm : ScalToVecWPermute<v4i32, ScalarLoads.ZELi16,
+ (XXSPLTWs (LXSIHZX xoaddr:$src), 1),
+ (XXSPLTWs (LXSIHZX xoaddr:$src), 1)>;
+defm : ScalToVecWPermute<v2i64, ScalarLoads.ZELi16i64,
+ (XXPERMDIs (LXSIHZX xoaddr:$src), 0),
+ (XXPERMDIs (LXSIHZX xoaddr:$src), 0)>;
+defm : ScalToVecWPermute<v4i32, ScalarLoads.SELi16,
+ (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1),
+ (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1)>;
+defm : ScalToVecWPermute<v2i64, ScalarLoads.SELi16i64,
+ (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0),
+ (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0)>;
+
+// Load/convert and convert/store patterns for f16.
+def : Pat<(f64 (extloadf16 xoaddr:$src)),
+ (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>;
+def : Pat<(truncstoref16 f64:$src, xoaddr:$dst),
+ (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>;
+def : Pat<(f32 (extloadf16 xoaddr:$src)),
+ (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>;
+def : Pat<(truncstoref16 f32:$src, xoaddr:$dst),
+ (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>;
+def : Pat<(f64 (f16_to_fp i32:$A)),
+ (f64 (XSCVHPDP (MTVSRWZ $A)))>;
+def : Pat<(f32 (f16_to_fp i32:$A)),
+ (f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>;
+def : Pat<(i32 (fp_to_f16 f32:$A)),
+ (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>;
+def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>;
+
+// Vector sign extensions
+def : Pat<(f64 (PPCVexts f64:$A, 1)),
+ (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>;
+def : Pat<(f64 (PPCVexts f64:$A, 2)),
+ (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
+
+def : Pat<(f64 (extloadf32 iaddrX4:$src)),
+ (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>;
+def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))),
+ (f32 (DFLOADf32 iaddrX4:$src))>;
+
+def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)),
+ (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>;
+def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)),
+ (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>;
+
+// Convert (Un)Signed DWord in memory -> QP
+def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))),
+ (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>;
+def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))),
+ (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>;
+def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))),
+ (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>;
+def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))),
+ (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>;
+
+// Convert Unsigned HWord in memory -> QP
+def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)),
+ (f128 (XSCVUDQP (LXSIHZX xaddr:$src)))>;
+
+// Convert Unsigned Byte in memory -> QP
+def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)),
+ (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>;
+
+// Truncate & Convert QP -> (Un)Signed (D)Word.
+def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>;
+def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>;
+def : Pat<(i32 (fp_to_sint f128:$src)),
+ (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>;
+def : Pat<(i32 (fp_to_uint f128:$src)),
+ (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>;
+
+// Instructions for store(fptosi).
+// The 8-byte version is repeated here due to availability of D-Form STXSD.
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8),
+ (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
+ xaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8),
+ (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
+ iaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4),
+ (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 2),
+ (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1),
+ (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8),
+ (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8),
+ (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2),
+ (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 1),
+ (STXSIBX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
+
+// Instructions for store(fptoui).
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8),
+ (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
+ xaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8),
+ (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
+ iaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4),
+ (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 2),
+ (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1),
+ (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8),
+ (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8),
+ (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2),
+ (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1),
+ (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
+
+// Round & Convert QP -> DP/SP
+def : Pat<(f64 (any_fpround f128:$src)), (f64 (XSCVQPDP $src))>;
+def : Pat<(f32 (any_fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>;
+
+// Convert SP -> QP
+def : Pat<(f128 (any_fpextend f32:$src)),
+ (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>;
+
+def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)),
+ (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC),
+ (COPY_TO_REGCLASS $XB, VSSRC)),
+ VSSRC))>;
+def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)),
+ (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC),
+ (COPY_TO_REGCLASS $XB, VSSRC)),
+ VSSRC))>;
+
+// Endianness-neutral patterns for const splats with ISA 3.0 instructions.
+defm : ScalToVecWPermute<v4i32, (i32 i32:$A), (MTVSRWS $A), (MTVSRWS $A)>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+ (v4i32 (MTVSRWS $A))>;
+def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)),
+ (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
+defm : ScalToVecWPermute<v4i32, FltToIntLoad.A,
+ (XVCVSPSXWS (LXVWSX xoaddr:$A)),
+ (XVCVSPSXWS (LXVWSX xoaddr:$A))>;
+defm : ScalToVecWPermute<v4i32, FltToUIntLoad.A,
+ (XVCVSPUXWS (LXVWSX xoaddr:$A)),
+ (XVCVSPUXWS (LXVWSX xoaddr:$A))>;
+defm : ScalToVecWPermute<
+ v4i32, DblToIntLoadP9.A,
+ (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1),
+ (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), sub_64)>;
+defm : ScalToVecWPermute<
+ v4i32, DblToUIntLoadP9.A,
+ (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1),
+ (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), sub_64)>;
+defm : ScalToVecWPermute<
+ v2i64, FltToLongLoadP9.A,
+ (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0),
+ (SUBREG_TO_REG
+ (i64 1),
+ (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), sub_64)>;
+defm : ScalToVecWPermute<
+ v2i64, FltToULongLoadP9.A,
+ (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0),
+ (SUBREG_TO_REG
+ (i64 1),
+ (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), sub_64)>;
+def : Pat<(v4f32 (PPCldsplat xoaddr:$A)),
+ (v4f32 (LXVWSX xoaddr:$A))>;
+def : Pat<(v4i32 (PPCldsplat xoaddr:$A)),
+ (v4i32 (LXVWSX xoaddr:$A))>;
+} // HasVSX, HasP9Vector
+
+// Big endian Power9 subtarget.
+let Predicates = [HasVSX, HasP9Vector, IsBigEndian] in {
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
+
+// Scalar stores of i8
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
+
+// Scalar stores of i16
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
+
+def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+
+def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
+ (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
+ (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+
+// (Un)Signed DWord vector extract -> QP
+def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
+ (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
+ (f128 (XSCVSDQP
+ (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
+def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
+ (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
+ (f128 (XSCVUDQP
+ (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
+
+// (Un)Signed Word vector extract -> QP
+def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 1)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
+foreach Idx = [0,2,3] in {
+ def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG
+ (VEXTSW2D (VSPLTW Idx, $src)), sub_64)))>;
+}
+foreach Idx = 0-3 in {
+ def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
+ (f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>;
+}
+
+// (Un)Signed HWord vector extract -> QP
+foreach Idx = 0-7 in {
+ def : Pat<(f128 (sint_to_fp
+ (i32 (sext_inreg
+ (vector_extract v8i16:$src, Idx), i16)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG
+ (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)),
+ sub_64)))>;
+ // The SDAG adds the `and` since an `i16` is being extracted as an `i32`.
+ def : Pat<(f128 (uint_to_fp
+ (and (i32 (vector_extract v8i16:$src, Idx)), 65535))),
+ (f128 (XSCVUDQP (EXTRACT_SUBREG
+ (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
+}
+
+// (Un)Signed Byte vector extract -> QP
+foreach Idx = 0-15 in {
+ def : Pat<(f128 (sint_to_fp
+ (i32 (sext_inreg (vector_extract v16i8:$src, Idx),
+ i8)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG
+ (VEXTSB2D (VEXTRACTUB Idx, $src)), sub_64)))>;
+ def : Pat<(f128 (uint_to_fp
+ (and (i32 (vector_extract v16i8:$src, Idx)), 255))),
+ (f128 (XSCVUDQP
+ (EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>;
+}
+
+// Unsiged int in vsx register -> QP
+def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVUDQP
+ (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>;
+} // HasVSX, HasP9Vector, IsBigEndian
+
+// Little endian Power9 subtarget.
+let Predicates = [HasVSX, HasP9Vector, IsLittleEndian] in {
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
+
+def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>;
+def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst),
+ (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
+
+def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>;
+def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst),
+ (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
+
+// Scalar stores of i8
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
+
+// Scalar stores of i16
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
+
+defm : ScalToVecWPermute<
+ v2i64, (i64 (load iaddrX4:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>;
+defm : ScalToVecWPermute<
+ v2i64, (i64 (load xaddrX4:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>;
+defm : ScalToVecWPermute<
+ v2f64, (f64 (load iaddrX4:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>;
+defm : ScalToVecWPermute<
+ v2f64, (f64 (load xaddrX4:$src)),
+ (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2),
+ (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>;
+
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ iaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+
+// (Un)Signed DWord vector extract -> QP
+def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
+ (f128 (XSCVSDQP
+ (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
+def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
+ (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
+ (f128 (XSCVUDQP
+ (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
+def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
+ (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+
+// (Un)Signed Word vector extract -> QP
+foreach Idx = [[0,3],[1,2],[3,0]] in {
+ def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG
+ (VEXTSW2D (VSPLTW !head(!tail(Idx)), $src)),
+ sub_64)))>;
+}
+def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 2)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
- // As PPCVABSD description, the last operand indicates whether do the
- // sign bit flip.
- def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))),
- (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
+foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in {
+ def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
+ (f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>;
}
+
+// (Un)Signed HWord vector extract -> QP
+// The Nested foreach lists identifies the vector element and corresponding
+// register byte location.
+foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in {
+ def : Pat<(f128 (sint_to_fp
+ (i32 (sext_inreg
+ (vector_extract v8i16:$src, !head(Idx)), i16)))),
+ (f128 (XSCVSDQP
+ (EXTRACT_SUBREG (VEXTSH2D
+ (VEXTRACTUH !head(!tail(Idx)), $src)),
+ sub_64)))>;
+ def : Pat<(f128 (uint_to_fp
+ (and (i32 (vector_extract v8i16:$src, !head(Idx))),
+ 65535))),
+ (f128 (XSCVUDQP (EXTRACT_SUBREG
+ (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
+}
+
+// (Un)Signed Byte vector extract -> QP
+foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7],
+ [9,6],[10,5],[11,4],[12,3],[13,2],[14,1],[15,0]] in {
+ def : Pat<(f128 (sint_to_fp
+ (i32 (sext_inreg
+ (vector_extract v16i8:$src, !head(Idx)), i8)))),
+ (f128 (XSCVSDQP
+ (EXTRACT_SUBREG
+ (VEXTSB2D (VEXTRACTUB !head(!tail(Idx)), $src)),
+ sub_64)))>;
+ def : Pat<(f128 (uint_to_fp
+ (and (i32 (vector_extract v16i8:$src, !head(Idx))),
+ 255))),
+ (f128 (XSCVUDQP
+ (EXTRACT_SUBREG
+ (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
+}
+
+// Unsiged int in vsx register -> QP
+def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVUDQP
+ (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 8)))>;
+} // HasVSX, HasP9Vector, IsLittleEndian
+
+// Any Power9 VSX subtarget that supports Power9 Altivec.
+let Predicates = [HasVSX, HasP9Altivec] in {
+// Put this P9Altivec related definition here since it's possible to be
+// selected to VSX instruction xvnegsp, avoid possible undef.
+def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))),
+ (v4i32 (VABSDUW $A, $B))>;
+
+def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))),
+ (v8i16 (VABSDUH $A, $B))>;
+
+def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))),
+ (v16i8 (VABSDUB $A, $B))>;
+
+// As PPCVABSD description, the last operand indicates whether do the
+// sign bit flip.
+def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))),
+ (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
+} // HasVSX, HasP9Altivec
+
+// Big endian Power9 VSX subtargets with P9 Altivec support.
+let Predicates = [HasVSX, HasP9Altivec, IsBigEndian] in {
+def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
+ (VEXTUBLX $Idx, $S)>;
+
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
+ (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
+ (VEXTUHLX (LI8 0), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
+ (VEXTUHLX (LI8 2), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
+ (VEXTUHLX (LI8 4), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
+ (VEXTUHLX (LI8 6), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
+ (VEXTUHLX (LI8 8), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
+ (VEXTUHLX (LI8 10), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
+ (VEXTUHLX (LI8 12), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
+ (VEXTUHLX (LI8 14), $S)>;
+
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
+ (VEXTUWLX (LI8 0), $S)>;
+
+// For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (i32 VectorExtractions.LE_WORD_2), sub_32)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
+ (VEXTUWLX (LI8 8), $S)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
+ (VEXTUWLX (LI8 12), $S)>;
+
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
+ (EXTSW (VEXTUWLX (LI8 0), $S))>;
+// For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
+ (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (i32 VectorExtractions.LE_WORD_2), sub_32))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
+ (EXTSW (VEXTUWLX (LI8 8), $S))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
+ (EXTSW (VEXTUWLX (LI8 12), $S))>;
+
+def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX $Idx, $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 1), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 2), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 3), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 4), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 5), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 6), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 7), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 9), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 10), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 11), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 12), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 13), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 14), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 15), $S), sub_32))>;
+
+def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX
+ (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 2), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 4), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 6), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 10), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 12), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 14), $S), sub_32))>;
+
+def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUWLX
+ (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 0), $S), sub_32))>;
+// For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
+def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 12), $S), sub_32))>;
+
+// P9 Altivec instructions that can be used to build vectors.
+// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
+// with complexities of existing build vector patterns in this file.
+def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)),
+ (v2i64 (VEXTSW2D $A))>;
+def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)),
+ (v2i64 (VEXTSH2D $A))>;
+def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1,
+ HWordToWord.BE_A2, HWordToWord.BE_A3)),
+ (v4i32 (VEXTSH2W $A))>;
+def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1,
+ ByteToWord.BE_A2, ByteToWord.BE_A3)),
+ (v4i32 (VEXTSB2W $A))>;
+def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)),
+ (v2i64 (VEXTSB2D $A))>;
+} // HasVSX, HasP9Altivec, IsBigEndian
+
+// Little endian Power9 VSX subtargets with P9 Altivec support.
+let Predicates = [HasVSX, HasP9Altivec, IsLittleEndian] in {
+def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
+ (VEXTUBRX $Idx, $S)>;
+
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
+ (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
+ (VEXTUHRX (LI8 0), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
+ (VEXTUHRX (LI8 2), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
+ (VEXTUHRX (LI8 4), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
+ (VEXTUHRX (LI8 6), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
+ (VEXTUHRX (LI8 8), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
+ (VEXTUHRX (LI8 10), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
+ (VEXTUHRX (LI8 12), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
+ (VEXTUHRX (LI8 14), $S)>;
+
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
+ (VEXTUWRX (LI8 0), $S)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
+ (VEXTUWRX (LI8 4), $S)>;
+// For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (i32 VectorExtractions.LE_WORD_2), sub_32)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
+ (VEXTUWRX (LI8 12), $S)>;
+
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
+ (EXTSW (VEXTUWRX (LI8 0), $S))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
+ (EXTSW (VEXTUWRX (LI8 4), $S))>;
+// For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
+ (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (i32 VectorExtractions.LE_WORD_2), sub_32))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
+ (EXTSW (VEXTUWRX (LI8 12), $S))>;
+
+def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX $Idx, $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 1), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 2), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 3), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 4), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 5), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 6), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 7), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 9), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 10), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 11), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 12), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 13), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 14), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 15), $S), sub_32))>;
+
+def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX
+ (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 2), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 4), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 6), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 10), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 12), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 14), $S), sub_32))>;
+
+def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUWRX
+ (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 4), $S), sub_32))>;
+// For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
+def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 12), $S), sub_32))>;
+
+// P9 Altivec instructions that can be used to build vectors.
+// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
+// with complexities of existing build vector patterns in this file.
+def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)),
+ (v2i64 (VEXTSW2D $A))>;
+def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)),
+ (v2i64 (VEXTSH2D $A))>;
+def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1,
+ HWordToWord.LE_A2, HWordToWord.LE_A3)),
+ (v4i32 (VEXTSH2W $A))>;
+def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1,
+ ByteToWord.LE_A2, ByteToWord.LE_A3)),
+ (v4i32 (VEXTSB2W $A))>;
+def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)),
+ (v2i64 (VEXTSB2D $A))>;
+} // HasVSX, HasP9Altivec, IsLittleEndian
+
+// Big endian VSX subtarget that supports additional direct moves from ISA3.0.
+let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian] in {
+def : Pat<(i64 (extractelt v2i64:$A, 1)),
+ (i64 (MFVSRLD $A))>;
+// Better way to build integer vectors if we have MTVSRDD. Big endian.
+def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
+ (v2i64 (MTVSRDD $rB, $rA))>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+ (MTVSRDD
+ (RLDIMI AnyExts.B, AnyExts.A, 32, 0),
+ (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>;
+
+def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)),
+ (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
+} // HasVSX, IsISA3_0, HasDirectMove, IsBigEndian
+
+// Little endian VSX subtarget that supports direct moves from ISA3.0.
+let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] in {
+def : Pat<(i64 (extractelt v2i64:$A, 0)),
+ (i64 (MFVSRLD $A))>;
+// Better way to build integer vectors if we have MTVSRDD. Little endian.
+def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
+ (v2i64 (MTVSRDD $rB, $rA))>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+ (MTVSRDD
+ (RLDIMI AnyExts.C, AnyExts.D, 32, 0),
+ (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>;
+
+def : Pat<(f128 (PPCbuild_fp128 i64:$rA, i64:$rB)),
+ (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
+} // HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian
+} // AddedComplexity = 400
+
+//---------------------------- Instruction aliases ---------------------------//
+def : InstAlias<"xvmovdp $XT, $XB",
+ (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+def : InstAlias<"xvmovsp $XT, $XB",
+ (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+
+def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
+def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
+def : InstAlias<"xxmrghd $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
+def : InstAlias<"xxmrgld $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
+def : InstAlias<"xxswapd $XT, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
+def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>;
+def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>;
+def : InstAlias<"xxswapd $XT, $XB",
+ (XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>;
+def : InstAlias<"mfvrd $rA, $XT",
+ (MFVRD g8rc:$rA, vrrc:$XT), 0>;
+def : InstAlias<"mffprd $rA, $src",
+ (MFVSRD g8rc:$rA, f8rc:$src)>;
+def : InstAlias<"mtvrd $XT, $rA",
+ (MTVRD vrrc:$XT, g8rc:$rA), 0>;
+def : InstAlias<"mtfprd $dst, $rA",
+ (MTVSRD f8rc:$dst, g8rc:$rA)>;
+def : InstAlias<"mfvrwz $rA, $XT",
+ (MFVRWZ gprc:$rA, vrrc:$XT), 0>;
+def : InstAlias<"mffprwz $rA, $src",
+ (MFVSRWZ gprc:$rA, f8rc:$src)>;
+def : InstAlias<"mtvrwa $XT, $rA",
+ (MTVRWA vrrc:$XT, gprc:$rA), 0>;
+def : InstAlias<"mtfprwa $dst, $rA",
+ (MTVSRWA f8rc:$dst, gprc:$rA)>;
+def : InstAlias<"mtvrwz $XT, $rA",
+ (MTVRWZ vrrc:$XT, gprc:$rA), 0>;
+def : InstAlias<"mtfprwz $dst, $rA",
+ (MTVSRWZ f8rc:$dst, gprc:$rA)>;
diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index b761f337533b5..a7546d2be5d83 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -53,7 +53,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -74,6 +73,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <cassert>
#include <iterator>
#include <utility>
@@ -244,10 +244,10 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)
-static const std::string PHINodeNameSuffix = ".phi";
-static const std::string CastNodeNameSuffix = ".cast";
-static const std::string GEPNodeIncNameSuffix = ".inc";
-static const std::string GEPNodeOffNameSuffix = ".off";
+static constexpr StringRef PHINodeNameSuffix = ".phi";
+static constexpr StringRef CastNodeNameSuffix = ".cast";
+static constexpr StringRef GEPNodeIncNameSuffix = ".inc";
+static constexpr StringRef GEPNodeOffNameSuffix = ".off";
FunctionPass *llvm::createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM) {
return new PPCLoopInstrFormPrep(TM);
@@ -263,7 +263,7 @@ static bool IsPtrInBounds(Value *BasePtr) {
return false;
}
-static std::string getInstrName(const Value *I, const std::string Suffix) {
+static std::string getInstrName(const Value *I, StringRef Suffix) {
assert(I && "Invalid paramater!");
if (I->hasName())
return (I->getName() + Suffix).str();
diff --git a/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp b/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
index 83cca11b27a37..2b0e604e0ccde 100644
--- a/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
@@ -54,6 +54,7 @@ private:
static StringRef getCPUSuffix(const PPCSubtarget *Subtarget);
static std::string createMASSVFuncName(Function &Func,
const PPCSubtarget *Subtarget);
+ bool handlePowSpecialCases(CallInst *CI, Function &Func, Module &M);
bool lowerMASSVCall(CallInst *CI, Function &Func, Module &M,
const PPCSubtarget *Subtarget);
};
@@ -96,6 +97,34 @@ PPCLowerMASSVEntries::createMASSVFuncName(Function &Func,
return MASSVEntryName;
}
+/// If there are proper fast-math flags, this function creates llvm.pow
+/// intrinsics when the exponent is 0.25 or 0.75.
+bool PPCLowerMASSVEntries::handlePowSpecialCases(CallInst *CI, Function &Func,
+ Module &M) {
+ if (Func.getName() != "__powf4_massv" && Func.getName() != "__powd2_massv")
+ return false;
+
+ if (Constant *Exp = dyn_cast<Constant>(CI->getArgOperand(1)))
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(Exp->getSplatValue())) {
+ // If the argument is 0.75 or 0.25 it is cheaper to turn it into pow
+ // intrinsic so that it could be optimzed as sequence of sqrt's.
+ if (!CI->hasNoInfs() || !CI->hasApproxFunc())
+ return false;
+
+ if (!CFP->isExactlyValue(0.75) && !CFP->isExactlyValue(0.25))
+ return false;
+
+ if (CFP->isExactlyValue(0.25) && !CI->hasNoSignedZeros())
+ return false;
+
+ CI->setCalledFunction(
+ Intrinsic::getDeclaration(&M, Intrinsic::pow, CI->getType()));
+ return true;
+ }
+
+ return false;
+}
+
/// Lowers generic MASSV entries to PowerPC subtarget-specific MASSV entries.
/// e.g.: __sind2_massv --> __sind2_P9 for a Power9 subtarget.
/// Both function prototypes and their callsites are updated during lowering.
@@ -105,11 +134,14 @@ bool PPCLowerMASSVEntries::lowerMASSVCall(CallInst *CI, Function &Func,
if (CI->use_empty())
return false;
+ // Handling pow(x, 0.25), pow(x, 0.75), powf(x, 0.25), powf(x, 0.75)
+ if (handlePowSpecialCases(CI, Func, M))
+ return true;
+
std::string MASSVEntryName = createMASSVFuncName(Func, Subtarget);
FunctionCallee FCache = M.getOrInsertFunction(
MASSVEntryName, Func.getFunctionType(), Func.getAttributes());
- CallSite CS(CI);
CI->setCalledFunction(FCache);
return true;
diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index b6496f189a3ae..236f98f32e18e 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -29,10 +29,6 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
-static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
- return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>();
-}
-
static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
AsmPrinter &AP) {
const TargetMachine &TM = AP.TM;
@@ -41,13 +37,6 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
MCContext &Ctx = AP.OutContext;
SmallString<128> Name;
- StringRef Suffix;
- if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)
- Suffix = "$non_lazy_ptr";
-
- if (!Suffix.empty())
- Name += DL.getPrivateGlobalPrefix();
-
if (!MO.isGlobal()) {
assert(MO.isSymbol() && "Isn't a symbol reference");
Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
@@ -56,30 +45,13 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
TM.getNameWithPrefix(Name, GV, Mang);
}
- Name += Suffix;
MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
- // If the symbol reference is actually to a non_lazy_ptr, not to the symbol,
- // then add the suffix.
- if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) {
- MachineModuleInfoMachO &MachO = getMachOMMI(AP);
-
- MachineModuleInfoImpl::StubValueTy &StubSym = MachO.getGVStubEntry(Sym);
-
- if (!StubSym.getPointer()) {
- assert(MO.isGlobal() && "Extern symbol not handled yet");
- StubSym = MachineModuleInfoImpl::
- StubValueTy(AP.getSymbol(MO.getGlobal()),
- !MO.getGlobal()->hasInternalLinkage());
- }
- return Sym;
- }
-
return Sym;
}
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
- AsmPrinter &Printer, bool IsDarwin) {
+ AsmPrinter &Printer) {
MCContext &Ctx = Printer.OutContext;
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
@@ -106,13 +78,30 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
break;
}
- if (MO.getTargetFlags() == PPCII::MO_PLT)
+ if (MO.getTargetFlags() == PPCII::MO_PLT)
RefKind = MCSymbolRefExpr::VK_PLT;
+ else if (MO.getTargetFlags() == PPCII::MO_PCREL_FLAG)
+ RefKind = MCSymbolRefExpr::VK_PCREL;
+ else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_GOT_FLAG))
+ RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL;
- const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+ const MachineInstr *MI = MO.getParent();
+ const MachineFunction *MF = MI->getMF();
const Module *M = MF->getFunction().getParent();
const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());
const TargetMachine &TM = Printer.TM;
+
+ unsigned MIOpcode = MI->getOpcode();
+ assert((Subtarget->isUsingPCRelativeCalls() || MIOpcode != PPC::BL8_NOTOC) &&
+ "BL8_NOTOC is only valid when using PC Relative Calls.");
+ if (Subtarget->isUsingPCRelativeCalls()) {
+ if (MIOpcode == PPC::TAILB || MIOpcode == PPC::TAILB8 ||
+ MIOpcode == PPC::TCRETURNdi || MIOpcode == PPC::TCRETURNdi8 ||
+ MIOpcode == PPC::BL8_NOTOC) {
+ RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
+ }
+ }
+
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);
// If -msecure-plt -fPIC, add 32768 to symbol.
if (Subtarget->isSecurePlt() && TM.isPositionIndependent() &&
@@ -137,10 +126,10 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
// Add ha16() / lo16() markers if required.
switch (access) {
case PPCII::MO_LO:
- Expr = PPCMCExpr::createLo(Expr, IsDarwin, Ctx);
+ Expr = PPCMCExpr::createLo(Expr, Ctx);
break;
case PPCII::MO_HA:
- Expr = PPCMCExpr::createHa(Expr, IsDarwin, Ctx);
+ Expr = PPCMCExpr::createHa(Expr, Ctx);
break;
}
@@ -148,20 +137,18 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
}
void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AsmPrinter &AP, bool IsDarwin) {
+ AsmPrinter &AP) {
OutMI.setOpcode(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MCOperand MCOp;
- if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP,
- IsDarwin))
+ if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP))
OutMI.addOperand(MCOp);
}
}
bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
- MCOperand &OutMO, AsmPrinter &AP,
- bool IsDarwin) {
+ MCOperand &OutMO, AsmPrinter &AP) {
switch (MO.getType()) {
default:
llvm_unreachable("unknown operand type");
@@ -170,6 +157,9 @@ bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
assert(MO.getReg() > PPC::NoRegister &&
MO.getReg() < PPC::NUM_TARGET_REGS &&
"Invalid register for this target!");
+ // Ignore all implicit register operands.
+ if (MO.isImplicit())
+ return false;
OutMO = MCOperand::createReg(MO.getReg());
return true;
case MachineOperand::MO_Immediate:
@@ -181,20 +171,20 @@ bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
return true;
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol:
- OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, IsDarwin);
+ OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP);
return true;
case MachineOperand::MO_JumpTableIndex:
- OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, IsDarwin);
+ OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
return true;
case MachineOperand::MO_ConstantPoolIndex:
- OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, IsDarwin);
+ OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
return true;
case MachineOperand::MO_BlockAddress:
- OutMO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP,
- IsDarwin);
+ OutMO =
+ GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP);
return true;
case MachineOperand::MO_MCSymbol:
- OutMO = GetSymbolRef(MO, MO.getMCSymbol(), AP, IsDarwin);
+ OutMO = GetSymbolRef(MO, MO.getMCSymbol(), AP);
return true;
case MachineOperand::MO_RegisterMask:
return false;
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 74192cb20cd05..d2aba6bd6e8de 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -57,6 +57,8 @@ STATISTIC(NumRotatesCollapsed,
"Number of pairs of rotate left, clear left/right collapsed");
STATISTIC(NumEXTSWAndSLDICombined,
"Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
+STATISTIC(NumLoadImmZeroFoldedAndRemoved,
+ "Number of LI(8) reg, 0 that are folded to r0 and removed");
static cl::opt<bool>
FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
@@ -124,9 +126,14 @@ public:
// Main entry point for this pass.
bool runOnMachineFunction(MachineFunction &MF) override {
+ initialize(MF);
+ // At this point, TOC pointer should not be used in a function that uses
+ // PC-Relative addressing.
+ assert((MF.getRegInfo().use_empty(PPC::X2) ||
+ !MF.getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) &&
+ "TOC pointer used in a function using PC-Relative addressing!");
if (skipFunction(MF.getFunction()))
return false;
- initialize(MF);
return simplifyCode();
}
};
@@ -314,7 +321,22 @@ bool PPCMIPeephole::simplifyCode(void) {
default:
break;
-
+ case PPC::LI:
+ case PPC::LI8: {
+ // If we are materializing a zero, look for any use operands for which
+ // zero means immediate zero. All such operands can be replaced with
+ // PPC::ZERO.
+ if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
+ break;
+ unsigned MIDestReg = MI.getOperand(0).getReg();
+ for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
+ Simplified |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
+ if (MRI->use_nodbg_empty(MIDestReg)) {
+ ++NumLoadImmZeroFoldedAndRemoved;
+ ToErase = &MI;
+ }
+ break;
+ }
case PPC::STD: {
MachineFrameInfo &MFI = MF->getFrameInfo();
if (MFI.hasVarSizedObjects() ||
@@ -541,10 +563,12 @@ bool PPCMIPeephole::simplifyCode(void) {
if (!P1 || !P2)
break;
- // Remove the passed FRSP instruction if it only feeds this MI and
- // set any uses of that FRSP (in this MI) to the source of the FRSP.
+ // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
+ // and set any uses of that FRSP/XSRSP (in this MI) to the source of
+ // the FRSP/XSRSP.
auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
- if (RoundInstr->getOpcode() == PPC::FRSP &&
+ unsigned Opc = RoundInstr->getOpcode();
+ if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
Simplified = true;
Register ConvReg1 = RoundInstr->getOperand(1).getReg();
@@ -554,7 +578,7 @@ bool PPCMIPeephole::simplifyCode(void) {
if (Use.getOperand(i).isReg() &&
Use.getOperand(i).getReg() == FRSPDefines)
Use.getOperand(i).setReg(ConvReg1);
- LLVM_DEBUG(dbgs() << "Removing redundant FRSP:\n");
+ LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
LLVM_DEBUG(RoundInstr->dump());
LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
LLVM_DEBUG(MI.dump());
@@ -882,11 +906,6 @@ bool PPCMIPeephole::simplifyCode(void) {
// while in PowerPC ISA, lowerest bit is at index 63.
APInt MaskSrc =
APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
- // Current APInt::getBitsSetWithWrap sets all bits to 0 if loBit is
- // equal to highBit.
- // If MBSrc - MESrc == 1, we expect a full set mask instead of Null.
- if (SrcMaskFull && (MBSrc - MESrc == 1))
- MaskSrc.setAllBits();
APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
APInt FinalMask = RotatedSrcMask & MaskMI;
@@ -1540,6 +1559,12 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "To: ");
LLVM_DEBUG(MI.dump());
NumRotatesCollapsed++;
+ // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
+ if (MRI->use_nodbg_empty(SrcReg)) {
+ assert(!SrcMI->hasImplicitDef() &&
+ "Not expecting an implicit def with this instr.");
+ SrcMI->eraseFromParent();
+ }
return true;
}
diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 2f65d6a2855be..daf88589bb52b 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -10,48 +10,55 @@
#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<bool> PPCDisableNonVolatileCR(
+ "ppc-disable-non-volatile-cr",
+ cl::desc("Disable the use of non-volatile CR register fields"),
+ cl::init(false), cl::Hidden);
void PPCFunctionInfo::anchor() {}
+PPCFunctionInfo::PPCFunctionInfo(const MachineFunction &MF)
+ : DisableNonVolatileCR(PPCDisableNonVolatileCR) {}
-MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
+MCSymbol *PPCFunctionInfo::getPICOffsetSymbol(MachineFunction &MF) const {
const DataLayout &DL = MF.getDataLayout();
return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
Twine(MF.getFunctionNumber()) +
"$poff");
}
-MCSymbol *PPCFunctionInfo::getGlobalEPSymbol() const {
+MCSymbol *PPCFunctionInfo::getGlobalEPSymbol(MachineFunction &MF) const {
const DataLayout &DL = MF.getDataLayout();
return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
"func_gep" +
Twine(MF.getFunctionNumber()));
}
-MCSymbol *PPCFunctionInfo::getLocalEPSymbol() const {
+MCSymbol *PPCFunctionInfo::getLocalEPSymbol(MachineFunction &MF) const {
const DataLayout &DL = MF.getDataLayout();
return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
"func_lep" +
Twine(MF.getFunctionNumber()));
}
-MCSymbol *PPCFunctionInfo::getTOCOffsetSymbol() const {
+MCSymbol *PPCFunctionInfo::getTOCOffsetSymbol(MachineFunction &MF) const {
const DataLayout &DL = MF.getDataLayout();
return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
"func_toc" +
Twine(MF.getFunctionNumber()));
}
-bool PPCFunctionInfo::isLiveInSExt(unsigned VReg) const {
- for (const std::pair<unsigned, ISD::ArgFlagsTy> &LiveIn : LiveInAttrs)
+bool PPCFunctionInfo::isLiveInSExt(Register VReg) const {
+ for (const std::pair<Register, ISD::ArgFlagsTy> &LiveIn : LiveInAttrs)
if (LiveIn.first == VReg)
return LiveIn.second.isSExt();
return false;
}
-bool PPCFunctionInfo::isLiveInZExt(unsigned VReg) const {
- for (const std::pair<unsigned, ISD::ArgFlagsTy> &LiveIn : LiveInAttrs)
+bool PPCFunctionInfo::isLiveInZExt(Register VReg) const {
+ for (const std::pair<Register, ISD::ArgFlagsTy> &LiveIn : LiveInAttrs)
if (LiveIn.first == VReg)
return LiveIn.second.isZExt();
return false;
diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 2b341b5952c80..29ca53e273d75 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -65,6 +65,10 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// SpillsCR - Indicates whether CR is spilled in the current function.
bool SpillsCR = false;
+ /// DisableNonVolatileCR - Indicates whether non-volatile CR fields would be
+ /// disabled.
+ bool DisableNonVolatileCR = false;
+
/// Indicates whether VRSAVE is spilled in the current function.
bool SpillsVRSAVE = false;
@@ -112,24 +116,17 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// If any of CR[2-4] need to be saved in the prologue and restored in the
/// epilogue then they are added to this array. This is used for the
/// 64-bit SVR4 ABI.
- SmallVector<unsigned, 3> MustSaveCRs;
-
- /// Hold onto our MachineFunction context.
- MachineFunction &MF;
+ SmallVector<Register, 3> MustSaveCRs;
/// Whether this uses the PIC Base register or not.
bool UsesPICBase = false;
- /// True if this function has a subset of CSRs that is handled explicitly via
- /// copies
- bool IsSplitCSR = false;
-
/// We keep track attributes for each live-in virtual registers
/// to use SExt/ZExt flags in later optimization.
- std::vector<std::pair<unsigned, ISD::ArgFlagsTy>> LiveInAttrs;
+ std::vector<std::pair<Register, ISD::ArgFlagsTy>> LiveInAttrs;
public:
- explicit PPCFunctionInfo(MachineFunction &MF) : MF(MF) {}
+ explicit PPCFunctionInfo(const MachineFunction &MF);
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -175,6 +172,9 @@ public:
void setSpillsCR() { SpillsCR = true; }
bool isCRSpilled() const { return SpillsCR; }
+ void setDisableNonVolatileCR() { DisableNonVolatileCR = true; }
+ bool isNonVolatileCRDisabled() const { return DisableNonVolatileCR; }
+
void setSpillsVRSAVE() { SpillsVRSAVE = true; }
bool isVRSAVESpilled() const { return SpillsVRSAVE; }
@@ -200,36 +200,33 @@ public:
void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
/// This function associates attributes for each live-in virtual register.
- void addLiveInAttr(unsigned VReg, ISD::ArgFlagsTy Flags) {
+ void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags) {
LiveInAttrs.push_back(std::make_pair(VReg, Flags));
}
/// This function returns true if the specified vreg is
/// a live-in register and sign-extended.
- bool isLiveInSExt(unsigned VReg) const;
+ bool isLiveInSExt(Register VReg) const;
/// This function returns true if the specified vreg is
/// a live-in register and zero-extended.
- bool isLiveInZExt(unsigned VReg) const;
+ bool isLiveInZExt(Register VReg) const;
int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
- const SmallVectorImpl<unsigned> &
+ const SmallVectorImpl<Register> &
getMustSaveCRs() const { return MustSaveCRs; }
- void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); }
+ void addMustSaveCR(Register Reg) { MustSaveCRs.push_back(Reg); }
void setUsesPICBase(bool uses) { UsesPICBase = uses; }
bool usesPICBase() const { return UsesPICBase; }
- bool isSplitCSR() const { return IsSplitCSR; }
- void setIsSplitCSR(bool s) { IsSplitCSR = s; }
-
- MCSymbol *getPICOffsetSymbol() const;
+ MCSymbol *getPICOffsetSymbol(MachineFunction &MF) const;
- MCSymbol *getGlobalEPSymbol() const;
- MCSymbol *getLocalEPSymbol() const;
- MCSymbol *getTOCOffsetSymbol() const;
+ MCSymbol *getGlobalEPSymbol(MachineFunction &MF) const;
+ MCSymbol *getLocalEPSymbol(MachineFunction &MF) const;
+ MCSymbol *getTOCOffsetSymbol(MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
index a38c8f475066e..5649d7d13966d 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
@@ -15,6 +15,16 @@ static cl::opt<bool>
DisableAddiLoadHeuristic("disable-ppc-sched-addi-load",
cl::desc("Disable scheduling addi instruction before"
"load for ppc"), cl::Hidden);
+static cl::opt<bool>
+ EnableAddiHeuristic("ppc-postra-bias-addi",
+ cl::desc("Enable scheduling addi instruction as early"
+ "as possible post ra"),
+ cl::Hidden, cl::init(true));
+
+static bool isADDIInstr(const GenericScheduler::SchedCandidate &Cand) {
+ return Cand.SU->getInstr()->getOpcode() == PPC::ADDI ||
+ Cand.SU->getInstr()->getOpcode() == PPC::ADDI8;
+}
bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
@@ -22,19 +32,13 @@ bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
if (DisableAddiLoadHeuristic)
return false;
- auto isADDIInstr = [&] (const MachineInstr &Inst) {
- return Inst.getOpcode() == PPC::ADDI || Inst.getOpcode() == PPC::ADDI8;
- };
-
SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand;
SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand;
- if (isADDIInstr(*FirstCand.SU->getInstr()) &&
- SecondCand.SU->getInstr()->mayLoad()) {
+ if (isADDIInstr(FirstCand) && SecondCand.SU->getInstr()->mayLoad()) {
TryCand.Reason = Stall;
return true;
}
- if (FirstCand.SU->getInstr()->mayLoad() &&
- isADDIInstr(*SecondCand.SU->getInstr())) {
+ if (FirstCand.SU->getInstr()->mayLoad() && isADDIInstr(SecondCand)) {
TryCand.Reason = NoCand;
return true;
}
@@ -61,6 +65,38 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
return;
}
+bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand) const {
+ if (!EnableAddiHeuristic)
+ return false;
+
+ if (isADDIInstr(TryCand) && !isADDIInstr(Cand)) {
+ TryCand.Reason = Stall;
+ return true;
+ }
+ return false;
+}
+
+void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand) {
+ PostGenericScheduler::tryCandidate(Cand, TryCand);
+
+ if (!Cand.isValid())
+ return;
+
+ // Add powerpc post ra specific heuristic only when TryCand isn't selected or
+ // selected as node order.
+ if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
+ return;
+
+ // There are some benefits to schedule the ADDI as early as possible post ra
+ // to avoid stalled by vector instructions which take up all the hw units.
+ // And ADDI is usually used to post inc the loop indvar, which matters the
+ // performance.
+ if (biasAddiCandidate(Cand, TryCand))
+ return;
+}
+
void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {
// Custom PPC PostRA specific behavior here.
PostGenericScheduler::enterMBB(MBB);
diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h
index 93532d9545a6e..a9734ca71859a 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h
+++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h
@@ -42,6 +42,9 @@ protected:
SUnit *pickNode(bool &IsTopNode) override;
void enterMBB(MachineBasicBlock *MBB) override;
void leaveMBB() override;
+
+ void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override;
+ bool biasAddiCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) const;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
new file mode 100644
index 0000000000000..815dfd1402f47
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
@@ -0,0 +1,203 @@
+//===- PPCMacroFusion.cpp - PowerPC Macro Fusion --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the PowerPC implementation of the DAG scheduling
+/// mutation to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/MacroFusion.h"
+
+using namespace llvm;
+namespace {
+
+class FusionFeature {
+public:
+ typedef SmallDenseSet<unsigned> FusionOpSet;
+
+ enum FusionKind {
+ #define FUSION_KIND(KIND) FK_##KIND
+ #define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) \
+ FUSION_KIND(KIND),
+ #include "PPCMacroFusion.def"
+ FUSION_KIND(END)
+ };
+private:
+ // Each fusion feature is assigned with one fusion kind. All the
+ // instructions with the same fusion kind have the same fusion characteristic.
+ FusionKind Kd;
+ // True if this feature is enabled.
+ bool Supported;
+ // li rx, si
+ // load rt, ra, rx
+ // The dependent operand index in the second op(load). And the negative means
+ // it could be any one.
+ int DepOpIdx;
+ // The first fusion op set.
+ FusionOpSet OpSet1;
+ // The second fusion op set.
+ FusionOpSet OpSet2;
+public:
+ FusionFeature(FusionKind Kind, bool HasFeature, int Index,
+ const FusionOpSet &First, const FusionOpSet &Second) :
+ Kd(Kind), Supported(HasFeature), DepOpIdx(Index), OpSet1(First),
+ OpSet2(Second) {}
+
+ bool hasOp1(unsigned Opc) const { return OpSet1.count(Opc) != 0; }
+ bool hasOp2(unsigned Opc) const { return OpSet2.count(Opc) != 0; }
+ bool isSupported() const { return Supported; }
+ Optional<unsigned> depOpIdx() const {
+ if (DepOpIdx < 0)
+ return None;
+ return DepOpIdx;
+ }
+
+ FusionKind getKind() const { return Kd; }
+};
+
+static bool matchingRegOps(const MachineInstr &FirstMI,
+ int FirstMIOpIndex,
+ const MachineInstr &SecondMI,
+ int SecondMIOpIndex) {
+ const MachineOperand &Op1 = FirstMI.getOperand(FirstMIOpIndex);
+ const MachineOperand &Op2 = SecondMI.getOperand(SecondMIOpIndex);
+ if (!Op1.isReg() || !Op2.isReg())
+ return false;
+
+ return Op1.getReg() == Op2.getReg();
+}
+
+// Return true if the FirstMI meets the constraints of SecondMI according to
+// fusion specification.
+static bool checkOpConstraints(FusionFeature::FusionKind Kd,
+ const MachineInstr &FirstMI,
+ const MachineInstr &SecondMI) {
+ switch (Kd) {
+ // The hardware didn't require any specific check for the fused instructions'
+ // operands. Therefore, return true to indicate that, it is fusable.
+ default: return true;
+ // [addi rt,ra,si - lxvd2x xt,ra,rb] etc.
+ case FusionFeature::FK_AddiLoad: {
+ // lxvd2x(ra) cannot be zero
+ const MachineOperand &RA = SecondMI.getOperand(1);
+ if (!RA.isReg())
+ return true;
+
+ return Register::isVirtualRegister(RA.getReg()) ||
+ (RA.getReg() != PPC::ZERO && RA.getReg() != PPC::ZERO8);
+ }
+ // [addis rt,ra,si - ld rt,ds(ra)] etc.
+ case FusionFeature::FK_AddisLoad: {
+ const MachineOperand &RT = SecondMI.getOperand(0);
+ if (!RT.isReg())
+ return true;
+
+ // Only check it for non-virtual register.
+ if (!Register::isVirtualRegister(RT.getReg()))
+ // addis(rt) = ld(ra) = ld(rt)
+ // ld(rt) cannot be zero
+ if (!matchingRegOps(SecondMI, 0, SecondMI, 2) ||
+ (RT.getReg() == PPC::ZERO || RT.getReg() == PPC::ZERO8))
+ return false;
+
+ // addis(si) first 12 bits must be all 1s or all 0s
+ const MachineOperand &SI = FirstMI.getOperand(2);
+ if (!SI.isImm())
+ return true;
+ int64_t Imm = SI.getImm();
+ if (((Imm & 0xFFF0) != 0) && ((Imm & 0xFFF0) != 0xFFF0))
+ return false;
+
+ // If si = 1111111111110000 and the msb of the d/ds field of the load equals
+ // 1, then fusion does not occur.
+ if ((Imm & 0xFFF0) == 0xFFF0) {
+ const MachineOperand &D = SecondMI.getOperand(1);
+ if (!D.isImm())
+ return true;
+
+ // 14 bit for DS field, while 16 bit for D field.
+ int MSB = 15;
+ if (SecondMI.getOpcode() == PPC::LD)
+ MSB = 13;
+
+ return (D.getImm() & (1ULL << MSB)) == 0;
+ }
+ return true;
+ }
+ }
+
+ llvm_unreachable("All the cases should have been handled");
+ return true;
+}
+
+/// Check if the instr pair, FirstMI and SecondMI, should be fused together.
+/// Given SecondMI, when FirstMI is unspecified, then check if SecondMI may be
+/// part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ // We use the PPC namespace to avoid the need to prefix opcodes with PPC:: in
+ // the def file.
+ using namespace PPC;
+
+ const PPCSubtarget &ST = static_cast<const PPCSubtarget&>(TSI);
+ static const FusionFeature FusionFeatures[] = {
+ #define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) { \
+ FusionFeature::FUSION_KIND(KIND), ST.HAS_FEATURE(), DEP_OP_IDX, { OPSET1 },\
+ { OPSET2 } },
+ #include "PPCMacroFusion.def"
+ };
+ #undef FUSION_KIND
+
+ for (auto &Feature : FusionFeatures) {
+ // Skip if the feature is not supported.
+ if (!Feature.isSupported())
+ continue;
+
+ // Only when the SecondMI is fusable, we are starting to look for the
+ // fusable FirstMI.
+ if (Feature.hasOp2(SecondMI.getOpcode())) {
+ // If FirstMI == nullptr, that means, we're only checking whether SecondMI
+ // can be fused at all.
+ if (!FirstMI)
+ return true;
+
+ // Checking if the FirstMI is fusable with the SecondMI.
+ if (!Feature.hasOp1(FirstMI->getOpcode()))
+ continue;
+
+ auto DepOpIdx = Feature.depOpIdx();
+ if (DepOpIdx.hasValue()) {
+ // Checking if the result of the FirstMI is the desired operand of the
+ // SecondMI if the DepOpIdx is set. Otherwise, ignore it.
+ if (!matchingRegOps(*FirstMI, 0, SecondMI, *DepOpIdx))
+ return false;
+ }
+
+ // Checking more on the instruction operands.
+ if (checkOpConstraints(Feature.getKind(), *FirstMI, SecondMI))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+} // end anonymous namespace
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation () {
+ return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+}
+
+} // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
new file mode 100644
index 0000000000000..c7e4e7c22e0a6
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
@@ -0,0 +1,45 @@
+//=== ---- PPCMacroFusion.def - PowerPC MacroFuson Candidates -v-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https)//llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier) Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains descriptions of the macro-fusion pair for PowerPC.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef FUSION_FEATURE
+
+// Each FUSION_FEATURE is assigned with one TYPE, and can be enabled/disabled
+// by HAS_FEATURE. The instructions pair is fusable only when the opcode
+// of the first instruction is in OPSET1, and the second instruction opcode is
+// in OPSET2. And if DEP_OP_IDX >=0, we will check the result of first OP is
+// the operand of the second op with DEP_OP_IDX as its operand index. We assume
+// that the result of the first op is its operand zero.
+#define FUSION_FEATURE(TYPE, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2)
+
+#endif
+
+#ifndef FUSION_OP_SET
+#define FUSION_OP_SET(...) __VA_ARGS__
+#endif
+
+// Power8 User Manual Section 10.1.12, Instruction Fusion
+// {addi} followed by one of these {lxvd2x, lxvw4x, lxvdsx, lvebx, lvehx,
+// lvewx, lvx, lxsdx}
+FUSION_FEATURE(AddiLoad, hasAddiLoadFusion, 2, \
+ FUSION_OP_SET(ADDI, ADDI8, ADDItocL), \
+ FUSION_OP_SET(LXVD2X, LXVW4X, LXVDSX, LVEBX, LVEHX, LVEWX, \
+ LVX, LXSDX))
+
+// {addis) followed by one of these {ld, lbz, lhz, lwz}
+FUSION_FEATURE(AddisLoad, hasAddisLoadFusion, 2, \
+ FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8), \
+ FUSION_OP_SET(LD, LBZ, LBZ8, LHZ, LHZ8, LWZ, LWZ8))
+
+#undef FUSION_FEATURE
+#undef FUSION_OP_SET
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.h b/llvm/lib/Target/PowerPC/PPCMacroFusion.h
new file mode 100644
index 0000000000000..91cbedf4558fc
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.h
@@ -0,0 +1,22 @@
+//===- PPCMacroFusion.h - PowerPC Macro Fusion ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the PowerPC definition of the DAG scheduling
+/// mutation to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// Note that you have to add:
+/// DAG.addMutation(createPowerPCMacroFusionDAGMutation());
+/// to PPCPassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation();
+} // llvm
diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index a4b4bf2973d1f..4ea714ff15f7a 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -109,6 +109,16 @@ namespace {
// Track the operand that kill Reg. We would unset the kill flag of
// the operand if there is a following redundant load immediate.
int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
+
+ // We can't just clear implicit kills, so if we encounter one, stop
+ // looking further.
+ if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) {
+ LLVM_DEBUG(dbgs()
+ << "Encountered an implicit kill, cannot proceed: ");
+ LLVM_DEBUG(AfterBBI->dump());
+ break;
+ }
+
if (KillIdx != -1) {
assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 01b97ba6ab202..35f5e1fbebcdf 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -142,6 +142,8 @@ const MCPhysReg*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
if (MF->getFunction().getCallingConv() == CallingConv::AnyReg) {
+ if (!TM.isPPC64() && Subtarget.isAIXABI())
+ report_fatal_error("AnyReg unimplemented on 32-bit AIX.");
if (Subtarget.hasVSX())
return CSR_64_AllRegs_VSX_SaveList;
if (Subtarget.hasAltivec())
@@ -149,21 +151,20 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_64_AllRegs_SaveList;
}
- if (Subtarget.isDarwinABI())
- return TM.isPPC64()
- ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_SaveList
- : CSR_Darwin64_SaveList)
- : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList
- : CSR_Darwin32_SaveList);
-
- if (TM.isPPC64() && MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
- return CSR_SRV464_TLS_PE_SaveList;
-
// On PPC64, we might need to save r2 (but only if it is not reserved).
- bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
+ // We do not need to treat R2 as callee-saved when using PC-Relative calls
+ // because any direct uses of R2 will cause it to be reserved. If the function
+ // is a leaf or the only uses of R2 are implicit uses for calls, the calls
+ // will use the @notoc relocation which will cause this function to set the
+ // st_other bit to 1, thereby communicating to its caller that it arbitrarily
+ // clobbers the TOC.
+ bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2) &&
+ !Subtarget.isUsingPCRelativeCalls();
// Cold calling convention CSRs.
if (MF->getFunction().getCallingConv() == CallingConv::Cold) {
+ if (Subtarget.isAIXABI())
+ report_fatal_error("Cold calling unimplemented on AIX.");
if (TM.isPPC64()) {
if (Subtarget.hasAltivec())
return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
@@ -181,12 +182,13 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
// Standard calling convention CSRs.
if (TM.isPPC64()) {
if (Subtarget.hasAltivec())
- return SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
- : CSR_SVR464_Altivec_SaveList;
- return SaveR2 ? CSR_SVR464_R2_SaveList
- : CSR_SVR464_SaveList;
+ return SaveR2 ? CSR_PPC64_R2_Altivec_SaveList
+ : CSR_PPC64_Altivec_SaveList;
+ return SaveR2 ? CSR_PPC64_R2_SaveList : CSR_PPC64_SaveList;
}
// 32-bit targets.
+ if (Subtarget.isAIXABI())
+ return CSR_AIX32_SaveList;
if (Subtarget.hasAltivec())
return CSR_SVR432_Altivec_SaveList;
else if (Subtarget.hasSPE())
@@ -194,31 +196,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_SVR432_SaveList;
}
-const MCPhysReg *
-PPCRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
- assert(MF && "Invalid MachineFunction pointer.");
- const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
- if (Subtarget.isDarwinABI())
- return nullptr;
- if (!TM.isPPC64())
- return nullptr;
- if (MF->getFunction().getCallingConv() != CallingConv::CXX_FAST_TLS)
- return nullptr;
- if (!MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
- return nullptr;
-
- // On PPC64, we might need to save r2 (but only if it is not reserved).
- bool SaveR2 = !getReservedRegs(*MF).test(PPC::X2);
- if (Subtarget.hasAltivec())
- return SaveR2
- ? CSR_SVR464_R2_Altivec_ViaCopy_SaveList
- : CSR_SVR464_Altivec_ViaCopy_SaveList;
- else
- return SaveR2
- ? CSR_SVR464_R2_ViaCopy_SaveList
- : CSR_SVR464_ViaCopy_SaveList;
-}
-
const uint32_t *
PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
@@ -231,14 +208,9 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return CSR_64_AllRegs_RegMask;
}
- if (Subtarget.isDarwinABI())
- return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_RegMask
- : CSR_Darwin64_RegMask)
- : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
- : CSR_Darwin32_RegMask);
if (Subtarget.isAIXABI()) {
assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet.");
- return TM.isPPC64() ? CSR_AIX64_RegMask : CSR_AIX32_RegMask;
+ return TM.isPPC64() ? CSR_PPC64_RegMask : CSR_AIX32_RegMask;
}
if (CC == CallingConv::Cold) {
@@ -250,12 +222,12 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
: CSR_SVR32_ColdCC_RegMask));
}
- return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask
- : CSR_SVR464_RegMask)
- : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask
- : (Subtarget.hasSPE()
- ? CSR_SVR432_SPE_RegMask
- : CSR_SVR432_RegMask));
+ return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
+ : CSR_PPC64_RegMask)
+ : (Subtarget.hasAltivec()
+ ? CSR_SVR432_Altivec_RegMask
+ : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask
+ : CSR_SVR432_RegMask));
}
const uint32_t*
@@ -295,8 +267,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
markSuperRegs(Reserved, PPC::LR8);
markSuperRegs(Reserved, PPC::RM);
- if (!Subtarget.isDarwinABI() || !Subtarget.hasAltivec())
- markSuperRegs(Reserved, PPC::VRSAVE);
+ markSuperRegs(Reserved, PPC::VRSAVE);
// The SVR4 ABI reserves r2 and r13
if (Subtarget.isSVR4ABI()) {
@@ -369,7 +340,8 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co
int FrIdx = Info[i].getFrameIdx();
unsigned Reg = Info[i].getReg();
- unsigned Opcode = InstrInfo->getStoreOpcodeForSpill(Reg);
+ const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg);
+ unsigned Opcode = InstrInfo->getStoreOpcodeForSpill(RC);
if (!MFI.isFixedObjectIndex(FrIdx)) {
// This is not a fixed object. If it requires alignment then we may still
// need to use the XForm.
@@ -389,7 +361,7 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co
return false;
}
-bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg,
+bool PPCRegisterInfo::isCallerPreservedPhysReg(MCRegister PhysReg,
const MachineFunction &MF) const {
assert(Register::isPhysicalRegister(PhysReg));
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
@@ -508,15 +480,70 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Get the maximum call stack size.
unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
+ Align MaxAlign = MFI.getMaxAlign();
+ assert(isAligned(MaxAlign, maxCallFrameSize) &&
+ "Maximum call-frame size not sufficiently aligned");
+ (void)MaxAlign;
+
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ bool KillNegSizeReg = MI.getOperand(1).isKill();
+ Register NegSizeReg = MI.getOperand(1).getReg();
+
+ prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, Reg);
+ // Grow the stack and update the stack pointer link, then determine the
+ // address of new allocated space.
+ if (LP64) {
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize);
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::R1)
+ .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize);
+ }
+
+ // Discard the DYNALLOC instruction.
+ MBB.erase(II);
+}
+
+/// To accomplish dynamic stack allocation, we have to calculate exact size
+/// subtracted from the stack pointer according alignment information and get
+/// previous frame pointer.
+void PPCRegisterInfo::prepareDynamicAlloca(MachineBasicBlock::iterator II,
+ Register &NegSizeReg,
+ bool &KillNegSizeReg,
+ Register &FramePointer) const {
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ // Get the instruction info.
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ // Determine whether 64-bit pointers are used.
+ bool LP64 = TM.isPPC64();
+ DebugLoc dl = MI.getDebugLoc();
// Get the total frame size.
unsigned FrameSize = MFI.getStackSize();
// Get stack alignments.
const PPCFrameLowering *TFI = getFrameLowering(MF);
- unsigned TargetAlign = TFI->getStackAlignment();
- unsigned MaxAlign = MFI.getMaxAlignment();
- assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
- "Maximum call-frame size not sufficiently aligned");
+ Align TargetAlign = TFI->getStackAlign();
+ Align MaxAlign = MFI.getMaxAlign();
// Determine the previous frame's address. If FrameSize can't be
// represented as 16 bits or we need special alignment, then we load the
@@ -526,32 +553,26 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Fortunately, a frame greater than 32K is rare.
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
if (LP64)
- BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), Reg)
- .addReg(PPC::X31)
- .addImm(FrameSize);
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), FramePointer)
+ .addReg(PPC::X31)
+ .addImm(FrameSize);
else
- BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
- .addReg(PPC::R31)
- .addImm(FrameSize);
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), FramePointer)
+ .addReg(PPC::R31)
+ .addImm(FrameSize);
} else if (LP64) {
- BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
- .addImm(0)
- .addReg(PPC::X1);
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), FramePointer)
+ .addImm(0)
+ .addReg(PPC::X1);
} else {
- BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
- .addImm(0)
- .addReg(PPC::R1);
+ BuildMI(MBB, II, dl, TII.get(PPC::LWZ), FramePointer)
+ .addImm(0)
+ .addReg(PPC::R1);
}
-
- bool KillNegSizeReg = MI.getOperand(1).isKill();
- Register NegSizeReg = MI.getOperand(1).getReg();
-
- // Grow the stack and update the stack pointer link, then determine the
- // address of new allocated space.
+ // Determine the actual NegSizeReg according to alignment info.
if (LP64) {
if (MaxAlign > TargetAlign) {
unsigned UnalNegSizeReg = NegSizeReg;
@@ -560,23 +581,15 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Unfortunately, there is no andi, only andi., and we can't insert that
// here because we might clobber cr0 while it is live.
BuildMI(MBB, II, dl, TII.get(PPC::LI8), NegSizeReg)
- .addImm(~(MaxAlign-1));
+ .addImm(~(MaxAlign.value() - 1));
unsigned NegSizeReg1 = NegSizeReg;
NegSizeReg = MF.getRegInfo().createVirtualRegister(G8RC);
BuildMI(MBB, II, dl, TII.get(PPC::AND8), NegSizeReg)
- .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg))
- .addReg(NegSizeReg1, RegState::Kill);
+ .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg))
+ .addReg(NegSizeReg1, RegState::Kill);
KillNegSizeReg = true;
}
-
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(Reg, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
- BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
- .addReg(PPC::X1)
- .addImm(maxCallFrameSize);
} else {
if (MaxAlign > TargetAlign) {
unsigned UnalNegSizeReg = NegSizeReg;
@@ -585,26 +598,47 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Unfortunately, there is no andi, only andi., and we can't insert that
// here because we might clobber cr0 while it is live.
BuildMI(MBB, II, dl, TII.get(PPC::LI), NegSizeReg)
- .addImm(~(MaxAlign-1));
+ .addImm(~(MaxAlign.value() - 1));
unsigned NegSizeReg1 = NegSizeReg;
NegSizeReg = MF.getRegInfo().createVirtualRegister(GPRC);
BuildMI(MBB, II, dl, TII.get(PPC::AND), NegSizeReg)
- .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg))
- .addReg(NegSizeReg1, RegState::Kill);
+ .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg))
+ .addReg(NegSizeReg1, RegState::Kill);
KillNegSizeReg = true;
}
+ }
+}
- BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1)
- .addReg(Reg, RegState::Kill)
- .addReg(PPC::R1)
- .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
- BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
- .addReg(PPC::R1)
- .addImm(maxCallFrameSize);
+void PPCRegisterInfo::lowerPrepareProbedAlloca(
+ MachineBasicBlock::iterator II) const {
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ // Get the instruction info.
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ // Determine whether 64-bit pointers are used.
+ bool LP64 = TM.isPPC64();
+ DebugLoc dl = MI.getDebugLoc();
+ Register FramePointer = MI.getOperand(0).getReg();
+ Register FinalStackPtr = MI.getOperand(1).getReg();
+ bool KillNegSizeReg = MI.getOperand(2).isKill();
+ Register NegSizeReg = MI.getOperand(2).getReg();
+ prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer);
+ if (LP64) {
+ BuildMI(MBB, II, dl, TII.get(PPC::ADD8), FinalStackPtr)
+ .addReg(PPC::X1)
+ .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::ADD4), FinalStackPtr)
+ .addReg(PPC::R1)
+ .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
}
- // Discard the DYNALLOC instruction.
MBB.erase(II);
}
@@ -665,7 +699,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// If the saved register wasn't CR0, shift the bits left so that they are in
// CR0's slot.
if (SrcReg != PPC::CR0) {
- unsigned Reg1 = Reg;
+ Register Reg1 = Reg;
Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
// rlwinm rA, rA, ShiftBits, 0, 31.
@@ -710,7 +744,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
// If the reloaded register isn't CR0, shift the bits right so that they are
// in the right CR's slot.
if (DestReg != PPC::CR0) {
- unsigned Reg1 = Reg;
+ Register Reg1 = Reg;
Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
unsigned ShiftBits = getEncodingValue(DestReg)*4;
@@ -814,7 +848,7 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
// If the saved register wasn't CR0LT, shift the bits left so that the bit
// to store is the first one. Mask all but that bit.
- unsigned Reg1 = Reg;
+ Register Reg1 = Reg;
Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
// rlwinm rA, rA, ShiftBits, 0, 0.
@@ -940,20 +974,17 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
}
bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
- unsigned Reg, int &FrameIdx) const {
- const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
- // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
- // ABI, return true to prevent allocating an additional frame slot.
- // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
- // is arbitrary and will be subsequently ignored. For 32-bit, we have
- // previously created the stack slot if needed, so return its FrameIdx.
- if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
- if (TM.isPPC64())
- FrameIdx = 0;
- else {
- const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- FrameIdx = FI->getCRSpillFrameIndex();
- }
+ Register Reg, int &FrameIdx) const {
+ // For the nonvolatile condition registers (CR2, CR3, CR4) return true to
+ // prevent allocating an additional frame slot.
+ // For 64-bit ELF and AIX, the CR save area is in the linkage area at SP+8,
+ // for 32-bit AIX the CR save area is in the linkage area at SP+4.
+ // We have created a FrameIndex to that spill slot to keep the CalleSaveInfos
+ // valid.
+ // For 32-bit ELF, we have previously created the stack slot if needed, so
+ // return its FrameIdx.
+ if (PPC::CR2 <= Reg && Reg <= PPC::CR4) {
+ FrameIdx = MF.getInfo<PPCFunctionInfo>()->getCRSpillFrameIndex();
return true;
}
return false;
@@ -1051,6 +1082,13 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
}
+ if (FPSI && FrameIndex == FPSI &&
+ (OpC == PPC::PREPARE_PROBED_ALLOCA_64 ||
+ OpC == PPC::PREPARE_PROBED_ALLOCA_32)) {
+ lowerPrepareProbedAlloca(II);
+ return;
+ }
+
// Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc.
if (OpC == PPC::SPILL_CR) {
lowerCRSpilling(II, FrameIndex);
@@ -1122,7 +1160,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
- unsigned SRegHi = MF.getRegInfo().createVirtualRegister(RC),
+ Register SRegHi = MF.getRegInfo().createVirtualRegister(RC),
SReg = MF.getRegInfo().createVirtualRegister(RC);
// Insert a set of rA with the full offset value before the ld, st, or add
@@ -1245,10 +1283,10 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
/// Insert defining instruction(s) for BaseReg to
/// be a pointer to FrameIdx at the beginning of the basic block.
-void PPCRegisterInfo::
-materializeFrameBaseRegister(MachineBasicBlock *MBB,
- unsigned BaseReg, int FrameIdx,
- int64_t Offset) const {
+void PPCRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ Register BaseReg,
+ int FrameIdx,
+ int64_t Offset) const {
unsigned ADDriOpc = TM.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
MachineBasicBlock::iterator Ins = MBB->begin();
@@ -1267,7 +1305,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
.addFrameIndex(FrameIdx).addImm(Offset);
}
-void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const {
unsigned FIOperandNum = 0;
while (!MI.getOperand(FIOperandNum).isFI()) {
@@ -1292,7 +1330,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
}
bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
- unsigned BaseReg,
+ Register BaseReg,
int64_t Offset) const {
unsigned FIOperandNum = 0;
while (!MI->getOperand(FIOperandNum).isFI()) {
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index a5fbb0c6ec641..61acd955e1cba 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -84,7 +84,6 @@ public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
- const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override;
const uint32_t *getNoPreservedMask() const override;
@@ -92,7 +91,8 @@ public:
void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
- bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const override;
+ bool isCallerPreservedPhysReg(MCRegister PhysReg,
+ const MachineFunction &MF) const override;
/// We require the register scavenger.
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
@@ -101,16 +101,16 @@ public:
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
- return true;
- }
-
bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override {
return true;
}
void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
void lowerDynamicAreaOffset(MachineBasicBlock::iterator II) const;
+ void prepareDynamicAlloca(MachineBasicBlock::iterator II,
+ Register &NegSizeReg, bool &KillNegSizeReg,
+ Register &FramePointer) const;
+ void lowerPrepareProbedAlloca(MachineBasicBlock::iterator II) const;
void lowerCRSpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
void lowerCRRestore(MachineBasicBlock::iterator II,
@@ -124,7 +124,7 @@ public:
void lowerVRSAVERestore(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
- bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+ bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg,
int &FrameIdx) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum,
@@ -132,12 +132,12 @@ public:
// Support for virtual base registers.
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- void materializeFrameBaseRegister(MachineBasicBlock *MBB,
- unsigned BaseReg, int FrameIdx,
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB, Register BaseReg,
+ int FrameIdx,
int64_t Offset) const override;
- void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
+ bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
int64_t Offset) const override;
// Debug information queries.
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 4719e947b1725..b45757c1acc5e 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -156,7 +156,7 @@ foreach Index = 32-63 in {
def VSX#Index : VSXReg<Index, "vs"#Index>;
}
-// The reprsentation of r0 when treated as the constant 0.
+// The representation of r0 when treated as the constant 0.
def ZERO : GPR<0, "0">, DwarfRegAlias<R0>;
def ZERO8 : GP8<ZERO, "0">, DwarfRegAlias<X0>;
@@ -363,11 +363,23 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32,
CR1LT, CR1GT, CR1EQ, CR1UN,
CR0LT, CR0GT, CR0EQ, CR0UN)> {
let Size = 32;
+ let AltOrders = [(sub CRBITRC, CR2LT, CR2GT, CR2EQ, CR2UN, CR3LT, CR3GT,
+ CR3EQ, CR3UN, CR4LT, CR4GT, CR4EQ, CR4UN)];
+ let AltOrderSelect = [{
+ return MF.getSubtarget<PPCSubtarget>().isELFv2ABI() &&
+ MF.getInfo<PPCFunctionInfo>()->isNonVolatileCRDisabled();
+ }];
}
-def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
- CR7, CR2, CR3, CR4)>;
-
+def CRRC : RegisterClass<"PPC", [i32], 32,
+ (add CR0, CR1, CR5, CR6,
+ CR7, CR2, CR3, CR4)> {
+ let AltOrders = [(sub CRRC, CR2, CR3, CR4)];
+ let AltOrderSelect = [{
+ return MF.getSubtarget<PPCSubtarget>().isELFv2ABI() &&
+ MF.getInfo<PPCFunctionInfo>()->isNonVolatileCRDisabled();
+ }];
+}
// The CTR registers are not allocatable because they're used by the
// decrement-and-branch instructions, and thus need to stay live across
// multiple basic blocks.
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index 6a79cca89194c..0a1ae7e55b3c2 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -20,7 +20,7 @@ def P9Model : SchedMachineModel {
// Load latency is 4 or 5 cycles depending on the load. This latency assumes
// that we have a cache hit. For a cache miss the load latency will be more.
- // There are two instructions (lxvl, lxvll) that have a latencty of 6 cycles.
+ // There are two instructions (lxvl, lxvll) that have a latency of 6 cycles.
// However it is not worth bumping this value up to 6 when the vast majority
// of instructions are 4 or 5 cycles.
let LoadLatency = 5;
@@ -40,9 +40,11 @@ def P9Model : SchedMachineModel {
let CompleteModel = 1;
- // Do not support QPX (Quad Processing eXtension) or SPE (Signal Procesing
- // Engine) on Power 9.
- let UnsupportedFeatures = [HasQPX, HasSPE];
+ // Do not support QPX (Quad Processing eXtension), SPE (Signal Processing
+ // Engine), prefixed instructions on Power 9, PC relative mem ops, or
+ // instructions introduced in ISA 3.1.
+ let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops,
+ IsISA3_1];
}
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 0997f68bd999b..3836cc960394f 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -78,6 +78,9 @@ void PPCSubtarget::initializeEnvironment() {
HasP8Crypto = false;
HasP9Vector = false;
HasP9Altivec = false;
+ HasP10Vector = false;
+ HasPrefixInstrs = false;
+ HasPCRelativeMemops = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
@@ -102,7 +105,6 @@ void PPCSubtarget::initializeEnvironment() {
FeatureMFTB = false;
AllowsUnalignedFPAccess = false;
DeprecatedDST = false;
- HasLazyResolverStubs = false;
HasICBT = false;
HasInvariantFunctionDescriptors = false;
HasPartwordAtomics = false;
@@ -110,19 +112,24 @@ void PPCSubtarget::initializeEnvironment() {
IsQPXStackUnaligned = false;
HasHTM = false;
HasFloat128 = false;
+ HasFusion = false;
+ HasAddiLoadFusion = false;
+ HasAddisLoadFusion = false;
IsISA3_0 = false;
+ IsISA3_1 = false;
UseLongCalls = false;
SecurePlt = false;
VectorsUseTwoUnits = false;
UsePPCPreRASchedStrategy = false;
UsePPCPostRASchedStrategy = false;
+ PredictableSelectIsExpensive = false;
HasPOPCNTD = POPCNTD_Unavailable;
}
void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// Determine default and user specified characteristics
- std::string CPUName = CPU;
+ std::string CPUName = std::string(CPU);
if (CPUName.empty() || CPU == "generic") {
// If cross-compiling with -march=ppc64le without -mcpu
if (TargetTriple.getArch() == Triple::ppc64le)
@@ -144,10 +151,6 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (IsPPC64 && has64BitSupport())
Use64BitRegs = true;
- // Set up darwin-specific properties.
- if (isDarwin())
- HasLazyResolverStubs = true;
-
if ((TargetTriple.isOSFreeBSD() && TargetTriple.getOSMajorVersion() >= 13) ||
TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() ||
TargetTriple.isMusl())
@@ -174,26 +177,10 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
}
-/// Return true if accesses to the specified global have to go through a dyld
-/// lazy resolution stub. This means that an extra load is required to get the
-/// address of the global.
-bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
- if (!HasLazyResolverStubs)
- return false;
- if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
- return true;
- // 32 bit macho has no relocation for a-b if a is undefined, even if b is in
- // the section that is being relocated. This means we have to use o load even
- // for GVs that are known to be local to the dso.
- if (GV->isDeclarationForLinker() || GV->hasCommonLinkage())
- return true;
- return false;
-}
-
bool PPCSubtarget::enableMachineScheduler() const { return true; }
bool PPCSubtarget::enableMachinePipeliner() const {
- return (CPUDirective == PPC::DIR_PWR9) && EnableMachinePipeliner;
+ return getSchedModel().hasInstrSchedModel() && EnableMachinePipeliner;
}
bool PPCSubtarget::useDFAforSMS() const { return false; }
@@ -243,3 +230,8 @@ bool PPCSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); }
+
+bool PPCSubtarget::isUsingPCRelativeCalls() const {
+ return isPPC64() && hasPCRelativeMemops() && isELFv2ABI() &&
+ CodeModel::Medium == getTargetMachine().getCodeModel();
+}
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 044e982740e91..ec329022c4572 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -34,36 +34,36 @@ class StringRef;
namespace PPC {
// -m directive values.
- enum {
- DIR_NONE,
- DIR_32,
- DIR_440,
- DIR_601,
- DIR_602,
- DIR_603,
- DIR_7400,
- DIR_750,
- DIR_970,
- DIR_A2,
- DIR_E500,
- DIR_E500mc,
- DIR_E5500,
- DIR_PWR3,
- DIR_PWR4,
- DIR_PWR5,
- DIR_PWR5X,
- DIR_PWR6,
- DIR_PWR6X,
- DIR_PWR7,
- DIR_PWR8,
- DIR_PWR9,
- DIR_PWR_FUTURE,
- DIR_64
- };
+enum {
+ DIR_NONE,
+ DIR_32,
+ DIR_440,
+ DIR_601,
+ DIR_602,
+ DIR_603,
+ DIR_7400,
+ DIR_750,
+ DIR_970,
+ DIR_A2,
+ DIR_E500,
+ DIR_E500mc,
+ DIR_E5500,
+ DIR_PWR3,
+ DIR_PWR4,
+ DIR_PWR5,
+ DIR_PWR5X,
+ DIR_PWR6,
+ DIR_PWR6X,
+ DIR_PWR7,
+ DIR_PWR8,
+ DIR_PWR9,
+ DIR_PWR10,
+ DIR_PWR_FUTURE,
+ DIR_64
+};
}
class GlobalValue;
-class TargetMachine;
class PPCSubtarget : public PPCGenSubtargetInfo {
public:
@@ -105,6 +105,9 @@ protected:
bool HasP8Crypto;
bool HasP9Vector;
bool HasP9Altivec;
+ bool HasP10Vector;
+ bool HasPrefixInstrs;
+ bool HasPCRelativeMemops;
bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -126,7 +129,6 @@ protected:
bool FeatureMFTB;
bool AllowsUnalignedFPAccess;
bool DeprecatedDST;
- bool HasLazyResolverStubs;
bool IsLittleEndian;
bool HasICBT;
bool HasInvariantFunctionDescriptors;
@@ -134,12 +136,17 @@ protected:
bool HasDirectMove;
bool HasHTM;
bool HasFloat128;
+ bool HasFusion;
+ bool HasAddiLoadFusion;
+ bool HasAddisLoadFusion;
bool IsISA3_0;
+ bool IsISA3_1;
bool UseLongCalls;
bool SecurePlt;
bool VectorsUseTwoUnits;
bool UsePPCPreRASchedStrategy;
bool UsePPCPostRASchedStrategy;
+ bool PredictableSelectIsExpensive;
POPCNTDKind HasPOPCNTD;
@@ -230,11 +237,6 @@ public:
/// the individual condition register bits.
bool useCRBits() const { return UseCRBits; }
- /// hasLazyResolverStub - Return true if accesses to the specified global have
- /// to go through a dyld lazy resolution stub. This means that an extra load
- /// is required to get the address of the global.
- bool hasLazyResolverStub(const GlobalValue *GV) const;
-
// isLittleEndian - True if generating little-endian code
bool isLittleEndian() const { return IsLittleEndian; }
@@ -261,6 +263,9 @@ public:
bool hasP8Crypto() const { return HasP8Crypto; }
bool hasP9Vector() const { return HasP9Vector; }
bool hasP9Altivec() const { return HasP9Altivec; }
+ bool hasP10Vector() const { return HasP10Vector; }
+ bool hasPrefixInstrs() const { return HasPrefixInstrs; }
+ bool hasPCRelativeMemops() const { return HasPCRelativeMemops; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasBPERMD() const { return HasBPERMD; }
@@ -294,16 +299,24 @@ public:
return Align(16);
}
- // DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no
- // red zone and PPC64 SVR4ABI has a 288-byte red zone.
unsigned getRedZoneSize() const {
- return isDarwinABI() ? 224 : (isPPC64() ? 288 : 0);
+ if (isPPC64())
+ // 288 bytes = 18*8 (FPRs) + 18*8 (GPRs, GPR13 reserved)
+ return 288;
+
+ // AIX PPC32: 220 bytes = 18*8 (FPRs) + 19*4 (GPRs);
+ // PPC32 SVR4ABI has no redzone.
+ return isAIXABI() ? 220 : 0;
}
bool hasHTM() const { return HasHTM; }
bool hasFloat128() const { return HasFloat128; }
bool isISA3_0() const { return IsISA3_0; }
+ bool isISA3_1() const { return IsISA3_1; }
bool useLongCalls() const { return UseLongCalls; }
+ bool hasFusion() const { return HasFusion; }
+ bool hasAddiLoadFusion() const { return HasAddiLoadFusion; }
+ bool hasAddisLoadFusion() const { return HasAddisLoadFusion; }
bool needsSwapsForVSXMemOps() const {
return hasVSX() && isLittleEndian() && !hasP9Vector();
}
@@ -312,8 +325,6 @@ public:
const Triple &getTargetTriple() const { return TargetTriple; }
- /// isDarwin - True if this is any darwin platform.
- bool isDarwin() const { return TargetTriple.isMacOSX(); }
/// isBGQ - True if this is a BG/Q platform.
bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
@@ -321,13 +332,13 @@ public:
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
- bool isDarwinABI() const { return isTargetMachO() || isDarwin(); }
bool isAIXABI() const { return TargetTriple.isOSAIX(); }
- bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); }
+ bool isSVR4ABI() const { return !isAIXABI(); }
bool isELFv2ABI() const;
bool is64BitELFABI() const { return isSVR4ABI() && isPPC64(); }
bool is32BitELFABI() const { return isSVR4ABI() && !isPPC64(); }
+ bool isUsingPCRelativeCalls() const;
/// Originally, this function return hasISEL(). Now we always enable it,
/// but may expand the ISEL instruction later.
@@ -389,6 +400,10 @@ public:
}
bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
+
+ bool isPredictableSelectIsExpensive() const {
+ return PredictableSelectIsExpensive;
+ }
};
} // End llvm namespace
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 17e1196eea590..4b809e0c8553b 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -78,9 +78,9 @@ protected:
Register OutReg = MI.getOperand(0).getReg();
Register InReg = MI.getOperand(1).getReg();
DebugLoc DL = MI.getDebugLoc();
- unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
+ Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
unsigned Opc1, Opc2;
- const unsigned OrigRegs[] = {OutReg, InReg, GPR3};
+ const Register OrigRegs[] = {OutReg, InReg, GPR3};
switch (MI.getOpcode()) {
default:
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 2caf4c99a1f88..f15f9c7f49429 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -14,6 +14,7 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPC.h"
#include "PPCMachineScheduler.h"
+#include "PPCMacroFusion.h"
#include "PPCSubtarget.h"
#include "PPCTargetObjectFile.h"
#include "PPCTargetTransformInfo.h"
@@ -158,7 +159,7 @@ static std::string getDataLayoutString(const Triple &T) {
static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
const Triple &TT) {
- std::string FullFS = FS;
+ std::string FullFS = std::string(FS);
// Make sure 64-bit features are available when CPUname is generic
if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
@@ -223,6 +224,9 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
Optional<Reloc::Model> RM) {
+ assert((!TT.isOSAIX() || !RM.hasValue() || *RM == Reloc::PIC_) &&
+ "Invalid relocation model for AIX.");
+
if (RM.hasValue())
return *RM;
@@ -230,8 +234,8 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
if (TT.isOSDarwin())
return Reloc::DynamicNoPIC;
- // Big Endian PPC is PIC by default.
- if (TT.getArch() == Triple::ppc64)
+ // Big Endian PPC and AIX default to PIC.
+ if (TT.getArch() == Triple::ppc64 || TT.isOSAIX())
return Reloc::PIC_;
// Rest are static by default.
@@ -272,6 +276,9 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
std::make_unique<GenericScheduler>(C));
// add DAG Mutations here.
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.hasFusion())
+ DAG->addMutation(createPowerPCMacroFusionDAGMutation());
+
return DAG;
}
@@ -283,6 +290,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler(
std::make_unique<PPCPostRASchedStrategy>(C) :
std::make_unique<PostGenericScheduler>(C), true);
// add DAG Mutations here.
+ if (ST.hasFusion())
+ DAG->addMutation(createPowerPCMacroFusionDAGMutation());
return DAG;
}
@@ -495,7 +504,7 @@ void PPCPassConfig::addPreRegAlloc() {
// PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on
// LiveVariables. This (unnecessary) dependency has been removed now,
// however a stage-2 clang build fails without LiveVariables computed here.
- addPass(&LiveVariablesID, false);
+ addPass(&LiveVariablesID);
addPass(createPPCTLSDynamicCallPass());
}
if (EnableExtraTOCRegDeps)
@@ -522,9 +531,9 @@ void PPCPassConfig::addPreEmitPass() {
addPass(createPPCExpandISELPass());
if (getOptLevel() != CodeGenOpt::None)
- addPass(createPPCEarlyReturnPass(), false);
+ addPass(createPPCEarlyReturnPass());
// Must run branch selection immediately preceding the asm printer.
- addPass(createPPCBranchSelectionPass(), false);
+ addPass(createPPCBranchSelectionPass());
}
TargetTransformInfo
diff --git a/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index e237fab1b2679..d52c9f92bd1d1 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "PPCTargetObjectFile.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -18,7 +19,6 @@ void
PPC64LinuxTargetObjectFile::
Initialize(MCContext &Ctx, const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
}
MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index e05699cc95ec6..53556ffc267df 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -33,6 +33,10 @@ EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
cl::desc("Enable using coldcc calling conv for cold "
"internal functions"));
+static cl::opt<bool>
+LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false),
+ cl::desc("Do not add instruction count to lsr cost model"));
+
// The latency of mtctr is only justified if there are more than 4
// comparisons that will be removed as a result.
static cl::opt<unsigned>
@@ -55,9 +59,10 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
-int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
if (DisablePPCConstHoist)
- return BaseT::getIntImmCost(Imm, Ty);
+ return BaseT::getIntImmCost(Imm, Ty, CostKind);
assert(Ty->isIntegerTy());
@@ -85,9 +90,10 @@ int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
}
int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
if (DisablePPCConstHoist)
- return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty);
+ return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
assert(Ty->isIntegerTy());
@@ -115,13 +121,14 @@ int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
return TTI::TCC_Free;
break;
}
- return PPCTTIImpl::getIntImmCost(Imm, Ty);
+ return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
if (DisablePPCConstHoist)
- return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty);
+ return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind);
assert(Ty->isIntegerTy());
@@ -199,22 +206,28 @@ int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
return TTI::TCC_Free;
}
- return PPCTTIImpl::getIntImmCost(Imm, Ty);
+ return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
-unsigned PPCTTIImpl::getUserCost(const User *U,
- ArrayRef<const Value *> Operands) {
+unsigned
+PPCTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) {
+ // We already implement getCastInstrCost and getMemoryOpCost where we perform
+ // the vector adjustment there.
+ if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
+ return BaseT::getUserCost(U, Operands, CostKind);
+
if (U->getType()->isVectorTy()) {
// Instructions that need to be split should cost more.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, U->getType());
- return LT.first * BaseT::getUserCost(U, Operands);
+ return LT.first * BaseT::getUserCost(U, Operands, CostKind);
}
- return BaseT::getUserCost(U, Operands);
+ return BaseT::getUserCost(U, Operands, CostKind);
}
-bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
- TargetLibraryInfo *LibInfo) {
+bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
+ SmallPtrSetImpl<const Value *> &Visited) {
const PPCTargetMachine &TM = ST->getTargetMachine();
// Loop through the inline asm constraints and look for something that
@@ -233,8 +246,11 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
// Determining the address of a TLS variable results in a function call in
// certain TLS models.
- std::function<bool(const Value*)> memAddrUsesCTR =
- [&memAddrUsesCTR, &TM](const Value *MemAddr) -> bool {
+ std::function<bool(const Value *)> memAddrUsesCTR =
+ [&memAddrUsesCTR, &TM, &Visited](const Value *MemAddr) -> bool {
+ // No need to traverse again if we already checked this operand.
+ if (!Visited.insert(MemAddr).second)
+ return false;
const auto *GV = dyn_cast<GlobalValue>(MemAddr);
if (!GV) {
// Recurse to check for constants that refer to TLS global variables.
@@ -264,7 +280,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
J != JE; ++J) {
if (CallInst *CI = dyn_cast<CallInst>(J)) {
// Inline ASM is okay, unless it clobbers the ctr register.
- if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
+ if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand())) {
if (asmClobbersCTR(IA))
return true;
continue;
@@ -277,7 +293,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
switch (F->getIntrinsicID()) {
default: continue;
- // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
+ // If we have a call to loop_decrement or set_loop_iterations,
// we're definitely using CTR.
case Intrinsic::set_loop_iterations:
case Intrinsic::loop_decrement:
@@ -308,6 +324,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
return true;
else
continue; // ISD::FCOPYSIGN is never a library call.
+ case Intrinsic::fma: Opcode = ISD::FMA; break;
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
@@ -412,8 +429,9 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
return true;
} else if (isa<BinaryOperator>(J) &&
- J->getType()->getScalarType()->isPPC_FP128Ty()) {
- // Most operations on ppc_f128 values become calls.
+ (J->getType()->getScalarType()->isFP128Ty() ||
+ J->getType()->getScalarType()->isPPC_FP128Ty())) {
+ // Most operations on f128 or ppc_f128 values become calls.
return true;
} else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
@@ -498,9 +516,10 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
// We don't want to spill/restore the counter register, and so we don't
// want to use the counter register if the loop contains calls.
+ SmallPtrSet<const Value *, 4> Visited;
for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
I != IE; ++I)
- if (mightUseCTR(*I, LibInfo))
+ if (mightUseCTR(*I, LibInfo, Visited))
return false;
SmallVector<BasicBlock*, 4> ExitingBlocks;
@@ -549,6 +568,10 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
BaseT::getUnrollingPreferences(L, SE, UP);
}
+void PPCTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
// This function returns true to allow using coldcc calling convention.
// Returning true results in coldcc being used for functions which are cold at
// all call sites when the callers of the functions are not calling any other
@@ -637,11 +660,12 @@ unsigned PPCTTIImpl::getCacheLineSize() const {
if (CacheLineSize.getNumOccurrences() > 0)
return CacheLineSize;
- // On P7, P8 or P9 we have a cache line size of 128.
+ // Starting with P7 we have a cache line size of 128.
unsigned Directive = ST->getCPUDirective();
// Assume that Future CPU has the same cache line size as the others.
if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
- Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE)
+ Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 ||
+ Directive == PPC::DIR_PWR_FUTURE)
return 128;
// On other processors return a default of 64 bytes.
@@ -673,9 +697,11 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
// For P7 and P8, floating-point instructions have a 6-cycle latency and
// there are two execution units, so unroll by 12x for latency hiding.
// FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
+ // FIXME: the same for P10 as previous gen until POWER10 scheduling is ready
// Assume that future is the same as the others.
if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
- Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE)
+ Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 ||
+ Directive == PPC::DIR_PWR_FUTURE)
return 12;
// For most things, modern systems have two execution units (and
@@ -711,6 +737,7 @@ int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1,
}
int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind,
TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info,
TTI::OperandValueProperties Opd1PropInfo,
@@ -718,9 +745,15 @@ int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+ // TODO: Handle more cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+ Op2Info, Opd1PropInfo,
+ Opd2PropInfo, Args, CxtI);
// Fallback to the default implementation.
- int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+ Op2Info,
Opd1PropInfo, Opd2PropInfo);
return vectorCostAdjustment(Cost, Opcode, Ty, nullptr);
}
@@ -739,17 +772,33 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
nullptr);
}
+int PPCTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Opcode == Instruction::PHI ? 0 : 1;
+ // Branches are assumed to be predicted.
+ return CostKind == TTI::TCK_RecipThroughput ? 0 : 1;
+}
+
int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
- int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src);
- return vectorCostAdjustment(Cost, Opcode, Dst, Src);
+ int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
+ Cost = vectorCostAdjustment(Cost, Opcode, Dst, Src);
+ // TODO: Allow non-throughput costs that aren't binary.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost == 0 ? 0 : 1;
+ return Cost;
}
int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
- int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost;
return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr);
}
@@ -828,13 +877,22 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
+ if (TLI->getValueType(DL, Src, true) == MVT::Other)
+ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind);
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
- int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind);
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return Cost;
+
Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr);
bool IsAltivecType = ST->hasAltivec() &&
@@ -856,7 +914,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// Aligned loads and stores are easy.
unsigned SrcBytes = LT.second.getStoreSize();
- if (!SrcBytes || !Alignment || Alignment >= SrcBytes)
+ if (!SrcBytes || !Alignment || *Alignment >= SrcBytes)
return Cost;
// If we can use the permutation-based load sequence, then this is also
@@ -868,7 +926,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// longer true.
if (Opcode == Instruction::Load &&
((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) &&
- Alignment >= LT.second.getScalarType().getStoreSize())
+ *Alignment >= LT.second.getScalarType().getStoreSize())
return Cost + LT.first; // Add the cost of the permutations.
// For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
@@ -893,22 +951,20 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// stores, loads are expanded using the vector-load + permutation sequence,
// which is much less expensive).
if (Src->isVectorTy() && Opcode == Instruction::Store)
- for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
+ for (int i = 0, e = cast<FixedVectorType>(Src)->getNumElements(); i < e;
+ ++i)
Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
return Cost;
}
-int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int PPCTTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace,
+ Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
assert(isa<VectorType>(VecTy) &&
@@ -919,7 +975,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
// Firstly, the cost of load/store operation.
int Cost =
- getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
+ getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
+ CostKind);
// PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
// (at least in the sense that there need only be one non-loop-invariant
@@ -931,18 +988,9 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
return Cost;
}
-unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
- return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
-}
-
-unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type*> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed) {
- if (ID == Intrinsic::bswap && ST->hasP9Vector())
- return TLI->getTypeLegalizationCost(DL, RetTy).first;
- return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
- ScalarizationCostPassed);
+unsigned PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
@@ -967,3 +1015,16 @@ bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
*BI = HWLoopInfo.ExitBranch;
return true;
}
+
+bool PPCTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+ TargetTransformInfo::LSRCost &C2) {
+ // PowerPC default behaviour here is "instruction number 1st priority".
+ // If LsrNoInsnsCost is set, call default implementation.
+ if (!LsrNoInsnsCost)
+ return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,
+ C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
+ std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,
+ C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
+ else
+ return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 35388d14f606d..d998521084e1c 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -33,7 +33,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
const PPCSubtarget *getST() const { return ST; }
const PPCTargetLowering *getTLI() const { return TLI; }
- bool mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo);
+ bool mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
+ SmallPtrSetImpl<const Value *> &Visited);
public:
explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F)
@@ -44,14 +45,16 @@ public:
/// @{
using BaseT::getIntImmCost;
- int getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ Type *Ty, TTI::TargetCostKind CostKind);
- unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
+ unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
@@ -63,6 +66,10 @@ public:
TargetLibraryInfo *LibInfo);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+ bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+ TargetTransformInfo::LSRCost &C2);
/// @}
@@ -87,6 +94,7 @@ public:
int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2);
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -95,24 +103,24 @@ public:
const Instruction *CxtI = nullptr);
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
+ int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
- unsigned AddressSpace, const Instruction *I = nullptr);
- int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
- unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF);
- unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type*> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed = UINT_MAX);
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+ int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
+ unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
/// @}
};