aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/SystemZ
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
commitcfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Target/SystemZ
parent706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
Notes
Diffstat (limited to 'llvm/lib/Target/SystemZ')
-rw-r--r--llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp386
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp3
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h11
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp4
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp7
-rw-r--r--llvm/lib/Target/SystemZ/SystemZ.h1
-rw-r--r--llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp48
-rw-r--r--llvm/lib/Target/SystemZ/SystemZAsmPrinter.h6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.h2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp7
-rw-r--r--llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp120
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFeatures.td87
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp323
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.h25
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp655
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h35
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrBuilder.h3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrFP.td24
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrFormats.td198
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp318
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.h41
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.td21
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrVector.td418
-rw-r--r--llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h16
-rw-r--r--llvm/lib/Target/SystemZ/SystemZOperands.td60
-rw-r--r--llvm/lib/Target/SystemZ/SystemZOperators.td8
-rw-r--r--llvm/lib/Target/SystemZ/SystemZPatterns.td6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZProcessors.td2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp21
-rw-r--r--llvm/lib/Target/SystemZ/SystemZRegisterInfo.h9
-rw-r--r--llvm/lib/Target/SystemZ/SystemZRegisterInfo.td6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp16
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h7
-rw-r--r--llvm/lib/Target/SystemZ/SystemZShortenInst.cpp45
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSubtarget.cpp19
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSubtarget.h4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTDC.cpp14
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp52
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetMachine.h13
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp421
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h51
44 files changed, 2415 insertions, 1105 deletions
diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 607266d552a6..d5a3a19446c7 100644
--- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -53,8 +53,6 @@ enum RegisterKind {
GRH32Reg,
GR64Reg,
GR128Reg,
- ADDR32Reg,
- ADDR64Reg,
FP32Reg,
FP64Reg,
FP128Reg,
@@ -109,7 +107,7 @@ private:
// Base + Disp + Index, where Base and Index are LLVM registers or 0.
// MemKind says what type of memory this is and RegKind says what type
- // the base register has (ADDR32Reg or ADDR64Reg). Length is the operand
+ // the base register has (GR32Reg or GR64Reg). Length is the operand
// length for D(L,B)-style operands, otherwise it is null.
struct MemOp {
unsigned Base : 12;
@@ -348,8 +346,8 @@ public:
bool isGRX32() const { return false; }
bool isGR64() const { return isReg(GR64Reg); }
bool isGR128() const { return isReg(GR128Reg); }
- bool isADDR32() const { return isReg(ADDR32Reg); }
- bool isADDR64() const { return isReg(ADDR64Reg); }
+ bool isADDR32() const { return isReg(GR32Reg); }
+ bool isADDR64() const { return isReg(GR64Reg); }
bool isADDR128() const { return false; }
bool isFP32() const { return isReg(FP32Reg); }
bool isFP64() const { return isReg(FP64Reg); }
@@ -361,16 +359,16 @@ public:
bool isAR32() const { return isReg(AR32Reg); }
bool isCR64() const { return isReg(CR64Reg); }
bool isAnyReg() const { return (isReg() || isImm(0, 15)); }
- bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); }
- bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); }
- bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, ADDR64Reg); }
- bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, ADDR64Reg); }
- bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, ADDR64Reg); }
- bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, ADDR64Reg); }
- bool isBDLAddr64Disp12Len4() const { return isMemDisp12Len4(ADDR64Reg); }
- bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); }
- bool isBDRAddr64Disp12() const { return isMemDisp12(BDRMem, ADDR64Reg); }
- bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, ADDR64Reg); }
+ bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, GR32Reg); }
+ bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, GR32Reg); }
+ bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, GR64Reg); }
+ bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, GR64Reg); }
+ bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, GR64Reg); }
+ bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, GR64Reg); }
+ bool isBDLAddr64Disp12Len4() const { return isMemDisp12Len4(GR64Reg); }
+ bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(GR64Reg); }
+ bool isBDRAddr64Disp12() const { return isMemDisp12(BDRMem, GR64Reg); }
+ bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, GR64Reg); }
bool isU1Imm() const { return isImm(0, 1); }
bool isU2Imm() const { return isImm(0, 3); }
bool isU3Imm() const { return isImm(0, 7); }
@@ -405,26 +403,24 @@ private:
SMLoc StartLoc, EndLoc;
};
- bool parseRegister(Register &Reg);
+ bool parseRegister(Register &Reg, bool RestoreOnFailure = false);
- bool parseRegister(Register &Reg, RegisterGroup Group, const unsigned *Regs,
- bool IsAddress = false);
+ bool parseIntegerRegister(Register &Reg, RegisterGroup Group);
OperandMatchResultTy parseRegister(OperandVector &Operands,
- RegisterGroup Group, const unsigned *Regs,
RegisterKind Kind);
OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
- bool parseAddress(bool &HaveReg1, Register &Reg1,
- bool &HaveReg2, Register &Reg2,
- const MCExpr *&Disp, const MCExpr *&Length);
+ bool parseAddress(bool &HaveReg1, Register &Reg1, bool &HaveReg2,
+ Register &Reg2, const MCExpr *&Disp, const MCExpr *&Length,
+ bool HasLength = false, bool HasVectorIndex = false);
bool parseAddressRegister(Register &Reg);
bool ParseDirectiveInsn(SMLoc L);
OperandMatchResultTy parseAddress(OperandVector &Operands,
- MemoryKind MemKind, const unsigned *Regs,
+ MemoryKind MemKind,
RegisterKind RegKind);
OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal,
@@ -449,6 +445,10 @@ public:
// Override MCTargetAsmParser.
bool ParseDirective(AsmToken DirectiveID) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
+ bool RestoreOnFailure);
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -458,76 +458,78 @@ public:
// Used by the TableGen code to parse particular operand types.
OperandMatchResultTy parseGR32(OperandVector &Operands) {
- return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg);
+ return parseRegister(Operands, GR32Reg);
}
OperandMatchResultTy parseGRH32(OperandVector &Operands) {
- return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg);
+ return parseRegister(Operands, GRH32Reg);
}
OperandMatchResultTy parseGRX32(OperandVector &Operands) {
llvm_unreachable("GRX32 should only be used for pseudo instructions");
}
OperandMatchResultTy parseGR64(OperandVector &Operands) {
- return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg);
+ return parseRegister(Operands, GR64Reg);
}
OperandMatchResultTy parseGR128(OperandVector &Operands) {
- return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, GR128Reg);
+ return parseRegister(Operands, GR128Reg);
}
OperandMatchResultTy parseADDR32(OperandVector &Operands) {
- return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, ADDR32Reg);
+ // For the AsmParser, we will accept %r0 for ADDR32 as well.
+ return parseRegister(Operands, GR32Reg);
}
OperandMatchResultTy parseADDR64(OperandVector &Operands) {
- return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, ADDR64Reg);
+ // For the AsmParser, we will accept %r0 for ADDR64 as well.
+ return parseRegister(Operands, GR64Reg);
}
OperandMatchResultTy parseADDR128(OperandVector &Operands) {
llvm_unreachable("Shouldn't be used as an operand");
}
OperandMatchResultTy parseFP32(OperandVector &Operands) {
- return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, FP32Reg);
+ return parseRegister(Operands, FP32Reg);
}
OperandMatchResultTy parseFP64(OperandVector &Operands) {
- return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, FP64Reg);
+ return parseRegister(Operands, FP64Reg);
}
OperandMatchResultTy parseFP128(OperandVector &Operands) {
- return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg);
+ return parseRegister(Operands, FP128Reg);
}
OperandMatchResultTy parseVR32(OperandVector &Operands) {
- return parseRegister(Operands, RegV, SystemZMC::VR32Regs, VR32Reg);
+ return parseRegister(Operands, VR32Reg);
}
OperandMatchResultTy parseVR64(OperandVector &Operands) {
- return parseRegister(Operands, RegV, SystemZMC::VR64Regs, VR64Reg);
+ return parseRegister(Operands, VR64Reg);
}
OperandMatchResultTy parseVF128(OperandVector &Operands) {
llvm_unreachable("Shouldn't be used as an operand");
}
OperandMatchResultTy parseVR128(OperandVector &Operands) {
- return parseRegister(Operands, RegV, SystemZMC::VR128Regs, VR128Reg);
+ return parseRegister(Operands, VR128Reg);
}
OperandMatchResultTy parseAR32(OperandVector &Operands) {
- return parseRegister(Operands, RegAR, SystemZMC::AR32Regs, AR32Reg);
+ return parseRegister(Operands, AR32Reg);
}
OperandMatchResultTy parseCR64(OperandVector &Operands) {
- return parseRegister(Operands, RegCR, SystemZMC::CR64Regs, CR64Reg);
+ return parseRegister(Operands, CR64Reg);
}
OperandMatchResultTy parseAnyReg(OperandVector &Operands) {
return parseAnyRegister(Operands);
}
OperandMatchResultTy parseBDAddr32(OperandVector &Operands) {
- return parseAddress(Operands, BDMem, SystemZMC::GR32Regs, ADDR32Reg);
+ return parseAddress(Operands, BDMem, GR32Reg);
}
OperandMatchResultTy parseBDAddr64(OperandVector &Operands) {
- return parseAddress(Operands, BDMem, SystemZMC::GR64Regs, ADDR64Reg);
+ return parseAddress(Operands, BDMem, GR64Reg);
}
OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) {
- return parseAddress(Operands, BDXMem, SystemZMC::GR64Regs, ADDR64Reg);
+ return parseAddress(Operands, BDXMem, GR64Reg);
}
OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) {
- return parseAddress(Operands, BDLMem, SystemZMC::GR64Regs, ADDR64Reg);
+ return parseAddress(Operands, BDLMem, GR64Reg);
}
OperandMatchResultTy parseBDRAddr64(OperandVector &Operands) {
- return parseAddress(Operands, BDRMem, SystemZMC::GR64Regs, ADDR64Reg);
+ return parseAddress(Operands, BDRMem, GR64Reg);
}
OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) {
- return parseAddress(Operands, BDVMem, SystemZMC::GR64Regs, ADDR64Reg);
+ return parseAddress(Operands, BDVMem, GR64Reg);
}
OperandMatchResultTy parsePCRel12(OperandVector &Operands) {
return parsePCRel(Operands, -(1LL << 12), (1LL << 12) - 1, false);
@@ -691,27 +693,37 @@ void SystemZOperand::print(raw_ostream &OS) const {
}
// Parse one register of the form %<prefix><number>.
-bool SystemZAsmParser::parseRegister(Register &Reg) {
+bool SystemZAsmParser::parseRegister(Register &Reg, bool RestoreOnFailure) {
Reg.StartLoc = Parser.getTok().getLoc();
// Eat the % prefix.
if (Parser.getTok().isNot(AsmToken::Percent))
return Error(Parser.getTok().getLoc(), "register expected");
+ const AsmToken &PercentTok = Parser.getTok();
Parser.Lex();
// Expect a register name.
- if (Parser.getTok().isNot(AsmToken::Identifier))
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ if (RestoreOnFailure)
+ getLexer().UnLex(PercentTok);
return Error(Reg.StartLoc, "invalid register");
+ }
// Check that there's a prefix.
StringRef Name = Parser.getTok().getString();
- if (Name.size() < 2)
+ if (Name.size() < 2) {
+ if (RestoreOnFailure)
+ getLexer().UnLex(PercentTok);
return Error(Reg.StartLoc, "invalid register");
+ }
char Prefix = Name[0];
// Treat the rest of the register name as a register number.
- if (Name.substr(1).getAsInteger(10, Reg.Num))
+ if (Name.substr(1).getAsInteger(10, Reg.Num)) {
+ if (RestoreOnFailure)
+ getLexer().UnLex(PercentTok);
return Error(Reg.StartLoc, "invalid register");
+ }
// Look for valid combinations of prefix and number.
if (Prefix == 'r' && Reg.Num < 16)
@@ -724,49 +736,102 @@ bool SystemZAsmParser::parseRegister(Register &Reg) {
Reg.Group = RegAR;
else if (Prefix == 'c' && Reg.Num < 16)
Reg.Group = RegCR;
- else
+ else {
+ if (RestoreOnFailure)
+ getLexer().UnLex(PercentTok);
return Error(Reg.StartLoc, "invalid register");
+ }
Reg.EndLoc = Parser.getTok().getLoc();
Parser.Lex();
return false;
}
-// Parse a register of group Group. If Regs is nonnull, use it to map
-// the raw register number to LLVM numbering, with zero entries
-// indicating an invalid register. IsAddress says whether the
-// register appears in an address context. Allow FP Group if expecting
-// RegV Group, since the f-prefix yields the FP group even while used
-// with vector instructions.
-bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group,
- const unsigned *Regs, bool IsAddress) {
- if (parseRegister(Reg))
- return true;
- if (Reg.Group != Group && !(Reg.Group == RegFP && Group == RegV))
- return Error(Reg.StartLoc, "invalid operand for instruction");
- if (Regs && Regs[Reg.Num] == 0)
- return Error(Reg.StartLoc, "invalid register pair");
- if (Reg.Num == 0 && IsAddress)
- return Error(Reg.StartLoc, "%r0 used in an address");
- if (Regs)
- Reg.Num = Regs[Reg.Num];
- return false;
-}
-
-// Parse a register and add it to Operands. The other arguments are as above.
+// Parse a register of kind Kind and add it to Operands.
OperandMatchResultTy
-SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group,
- const unsigned *Regs, RegisterKind Kind) {
- if (Parser.getTok().isNot(AsmToken::Percent))
+SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterKind Kind) {
+ Register Reg;
+ RegisterGroup Group;
+ switch (Kind) {
+ case GR32Reg:
+ case GRH32Reg:
+ case GR64Reg:
+ case GR128Reg:
+ Group = RegGR;
+ break;
+ case FP32Reg:
+ case FP64Reg:
+ case FP128Reg:
+ Group = RegFP;
+ break;
+ case VR32Reg:
+ case VR64Reg:
+ case VR128Reg:
+ Group = RegV;
+ break;
+ case AR32Reg:
+ Group = RegAR;
+ break;
+ case CR64Reg:
+ Group = RegCR;
+ break;
+ }
+
+ // Handle register names of the form %<prefix><number>
+ if (Parser.getTok().is(AsmToken::Percent)) {
+ if (parseRegister(Reg))
+ return MatchOperand_ParseFail;
+
+ // Check the parsed register group "Reg.Group" with the expected "Group"
+ // Have to error out if user specified wrong prefix.
+ switch (Group) {
+ case RegGR:
+ case RegFP:
+ case RegAR:
+ case RegCR:
+ if (Group != Reg.Group) {
+ Error(Reg.StartLoc, "invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+ break;
+ case RegV:
+ if (Reg.Group != RegV && Reg.Group != RegFP) {
+ Error(Reg.StartLoc, "invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+ break;
+ }
+ } else if (Parser.getTok().is(AsmToken::Integer)) {
+ if (parseIntegerRegister(Reg, Group))
+ return MatchOperand_ParseFail;
+ }
+ // Otherwise we didn't match a register operand.
+ else
return MatchOperand_NoMatch;
- Register Reg;
- bool IsAddress = (Kind == ADDR32Reg || Kind == ADDR64Reg);
- if (parseRegister(Reg, Group, Regs, IsAddress))
+ // Determine the LLVM register number according to Kind.
+ const unsigned *Regs;
+ switch (Kind) {
+ case GR32Reg: Regs = SystemZMC::GR32Regs; break;
+ case GRH32Reg: Regs = SystemZMC::GRH32Regs; break;
+ case GR64Reg: Regs = SystemZMC::GR64Regs; break;
+ case GR128Reg: Regs = SystemZMC::GR128Regs; break;
+ case FP32Reg: Regs = SystemZMC::FP32Regs; break;
+ case FP64Reg: Regs = SystemZMC::FP64Regs; break;
+ case FP128Reg: Regs = SystemZMC::FP128Regs; break;
+ case VR32Reg: Regs = SystemZMC::VR32Regs; break;
+ case VR64Reg: Regs = SystemZMC::VR64Regs; break;
+ case VR128Reg: Regs = SystemZMC::VR128Regs; break;
+ case AR32Reg: Regs = SystemZMC::AR32Regs; break;
+ case CR64Reg: Regs = SystemZMC::CR64Regs; break;
+ }
+ if (Regs[Reg.Num] == 0) {
+ Error(Reg.StartLoc, "invalid register pair");
return MatchOperand_ParseFail;
+ }
- Operands.push_back(SystemZOperand::createReg(Kind, Reg.Num,
- Reg.StartLoc, Reg.EndLoc));
+ Operands.push_back(
+ SystemZOperand::createReg(Kind, Regs[Reg.Num], Reg.StartLoc, Reg.EndLoc));
return MatchOperand_Success;
}
@@ -831,11 +896,39 @@ SystemZAsmParser::parseAnyRegister(OperandVector &Operands) {
return MatchOperand_Success;
}
+bool SystemZAsmParser::parseIntegerRegister(Register &Reg,
+ RegisterGroup Group) {
+ Reg.StartLoc = Parser.getTok().getLoc();
+ // We have an integer token
+ const MCExpr *Register;
+ if (Parser.parseExpression(Register))
+ return true;
+
+ const auto *CE = dyn_cast<MCConstantExpr>(Register);
+ if (!CE)
+ return true;
+
+ int64_t MaxRegNum = (Group == RegV) ? 31 : 15;
+ int64_t Value = CE->getValue();
+ if (Value < 0 || Value > MaxRegNum) {
+ Error(Parser.getTok().getLoc(), "invalid register");
+ return true;
+ }
+
+ // Assign the Register Number
+ Reg.Num = (unsigned)Value;
+ Reg.Group = Group;
+ Reg.EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ // At this point, successfully parsed an integer register.
+ return false;
+}
+
// Parse a memory operand into Reg1, Reg2, Disp, and Length.
bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1,
bool &HaveReg2, Register &Reg2,
- const MCExpr *&Disp,
- const MCExpr *&Length) {
+ const MCExpr *&Disp, const MCExpr *&Length,
+ bool HasLength, bool HasVectorIndex) {
// Parse the displacement, which must always be present.
if (getParser().parseExpression(Disp))
return true;
@@ -844,6 +937,27 @@ bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1,
HaveReg1 = false;
HaveReg2 = false;
Length = nullptr;
+
+ // If we have a scenario as below:
+ // vgef %v0, 0(0), 0
+ // This is an example of a "BDVMem" instruction type.
+ //
+ // So when we parse this as an integer register, the register group
+ // needs to be tied to "RegV". Usually when the prefix is passed in
+ // as %<prefix><reg-number> its easy to check which group it should belong to
+ // However, if we're passing in just the integer there's no real way to
+ // "check" what register group it should belong to.
+ //
+ // When the user passes in the register as an integer, the user assumes that
+ // the compiler is responsible for substituting it as the right kind of
+ // register. Whereas, when the user specifies a "prefix", the onus is on
+ // the user to make sure they pass in the right kind of register.
+ //
+ // The restriction only applies to the first Register (i.e. Reg1). Reg2 is
+ // always a general register. Reg1 should be of group RegV if "HasVectorIndex"
+ // (i.e. insn is of type BDVMem) is true.
+ RegisterGroup RegGroup = HasVectorIndex ? RegV : RegGR;
+
if (getLexer().is(AsmToken::LParen)) {
Parser.Lex();
@@ -852,18 +966,47 @@ bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1,
HaveReg1 = true;
if (parseRegister(Reg1))
return true;
+ }
+ // So if we have an integer as the first token in ([tok1], ..), it could:
+ // 1. Refer to a "Register" (i.e X,R,V fields in BD[X|R|V]Mem type of
+ // instructions)
+ // 2. Refer to a "Length" field (i.e L field in BDLMem type of instructions)
+ else if (getLexer().is(AsmToken::Integer)) {
+ if (HasLength) {
+ // Instruction has a "Length" field, safe to parse the first token as
+ // the "Length" field
+ if (getParser().parseExpression(Length))
+ return true;
+ } else {
+ // Otherwise, if the instruction has no "Length" field, parse the
+ // token as a "Register". We don't have to worry about whether the
+ // instruction is invalid here, because the caller will take care of
+ // error reporting.
+ HaveReg1 = true;
+ if (parseIntegerRegister(Reg1, RegGroup))
+ return true;
+ }
} else {
- // Parse the length.
- if (getParser().parseExpression(Length))
- return true;
+ // If its not an integer or a percent token, then if the instruction
+ // is reported to have a "Length" then, parse it as "Length".
+ if (HasLength) {
+ if (getParser().parseExpression(Length))
+ return true;
+ }
}
// Check whether there's a second register.
if (getLexer().is(AsmToken::Comma)) {
Parser.Lex();
HaveReg2 = true;
- if (parseRegister(Reg2))
- return true;
+
+ if (getLexer().is(AsmToken::Integer)) {
+ if (parseIntegerRegister(Reg2, RegGR))
+ return true;
+ } else {
+ if (parseRegister(Reg2))
+ return true;
+ }
}
// Consume the closing bracket.
@@ -883,9 +1026,6 @@ SystemZAsmParser::parseAddressRegister(Register &Reg) {
} else if (Reg.Group != RegGR) {
Error(Reg.StartLoc, "invalid address register");
return true;
- } else if (Reg.Num == 0) {
- Error(Reg.StartLoc, "%r0 used in an address");
- return true;
}
return false;
}
@@ -894,16 +1034,27 @@ SystemZAsmParser::parseAddressRegister(Register &Reg) {
// are as above.
OperandMatchResultTy
SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
- const unsigned *Regs, RegisterKind RegKind) {
+ RegisterKind RegKind) {
SMLoc StartLoc = Parser.getTok().getLoc();
unsigned Base = 0, Index = 0, LengthReg = 0;
Register Reg1, Reg2;
bool HaveReg1, HaveReg2;
const MCExpr *Disp;
const MCExpr *Length;
- if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Disp, Length))
+
+ bool HasLength = (MemKind == BDLMem) ? true : false;
+ bool HasVectorIndex = (MemKind == BDVMem) ? true : false;
+ if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Disp, Length, HasLength,
+ HasVectorIndex))
return MatchOperand_ParseFail;
+ const unsigned *Regs;
+ switch (RegKind) {
+ case GR32Reg: Regs = SystemZMC::GR32Regs; break;
+ case GR64Reg: Regs = SystemZMC::GR64Regs; break;
+ default: llvm_unreachable("invalid RegKind");
+ }
+
switch (MemKind) {
case BDMem:
// If we have Reg1, it must be an address register.
@@ -912,11 +1063,7 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
return MatchOperand_ParseFail;
Base = Regs[Reg1.Num];
}
- // There must be no Reg2 or length.
- if (Length) {
- Error(StartLoc, "invalid use of length addressing");
- return MatchOperand_ParseFail;
- }
+ // There must be no Reg2.
if (HaveReg2) {
Error(StartLoc, "invalid use of indexed addressing");
return MatchOperand_ParseFail;
@@ -940,11 +1087,6 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
return MatchOperand_ParseFail;
Base = Regs[Reg2.Num];
}
- // There must be no length.
- if (Length) {
- Error(StartLoc, "invalid use of length addressing");
- return MatchOperand_ParseFail;
- }
break;
case BDLMem:
// If we have Reg2, it must be an address register.
@@ -977,11 +1119,6 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
return MatchOperand_ParseFail;
Base = Regs[Reg2.Num];
}
- // There must be no length.
- if (Length) {
- Error(StartLoc, "invalid use of length addressing");
- return MatchOperand_ParseFail;
- }
break;
case BDVMem:
// We must have Reg1, and it must be a vector register.
@@ -996,16 +1133,11 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
return MatchOperand_ParseFail;
Base = Regs[Reg2.Num];
}
- // There must be no length.
- if (Length) {
- Error(StartLoc, "invalid use of length addressing");
- return MatchOperand_ParseFail;
- }
break;
}
SMLoc EndLoc =
- SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp,
Index, Length, LengthReg,
StartLoc, EndLoc));
@@ -1118,15 +1250,15 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) {
}
// Emit as a regular instruction.
- Parser.getStreamer().EmitInstruction(Inst, getSTI());
+ Parser.getStreamer().emitInstruction(Inst, getSTI());
return false;
}
bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) {
+ SMLoc &EndLoc, bool RestoreOnFailure) {
Register Reg;
- if (parseRegister(Reg))
+ if (parseRegister(Reg, RestoreOnFailure))
return true;
if (Reg.Group == RegGR)
RegNo = SystemZMC::GR64Regs[Reg.Num];
@@ -1143,6 +1275,25 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
return false;
}
+bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
+}
+
+OperandMatchResultTy SystemZAsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ bool Result =
+ ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
+ bool PendingErrors = getParser().hasPendingError();
+ getParser().clearPendingErrors();
+ if (PendingErrors)
+ return MatchOperand_ParseFail;
+ if (Result)
+ return MatchOperand_NoMatch;
+ return MatchOperand_Success;
+}
+
bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
@@ -1215,7 +1366,8 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
bool HaveReg1, HaveReg2;
const MCExpr *Expr;
const MCExpr *Length;
- if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Expr, Length))
+ if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Expr, Length,
+ /*HasLength*/ true, /*HasVectorIndex*/ true))
return true;
// If the register combination is not valid for any instruction, reject it.
// Otherwise, fall back to reporting an unrecognized instruction.
@@ -1252,7 +1404,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (MatchResult) {
case Match_Success:
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, getSTI());
+ Out.emitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature: {
@@ -1322,7 +1474,7 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
}
int64_t Value = CE->getValue();
MCSymbol *Sym = Ctx.createTempSymbol();
- Out.EmitLabel(Sym);
+ Out.emitLabel(Sym);
const MCExpr *Base = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
Ctx);
Expr = Value == 0 ? Base : MCBinaryExpr::createAdd(Base, Expr, Ctx);
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
index 5893b227c08c..fac363cae713 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
@@ -155,7 +155,8 @@ void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
MO.getExpr()->print(O, &MAI);
}
-void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum,
+void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI,
+ uint64_t Address, int OpNum,
raw_ostream &O) {
// Output the PC-relative operand.
printPCRelOperand(MI, OpNum, O);
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
index 5628e9252f03..cfe1bd89c3eb 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
@@ -46,6 +46,10 @@ public:
private:
// Print various types of operand.
void printOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum,
+ raw_ostream &O) {
+ printOperand(MI, OpNum, O);
+ }
void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
@@ -65,7 +69,12 @@ private:
void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU48ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O);
- void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printPCRelOperand(const MCInst *MI, uint64_t /*Address*/, int OpNum,
+ raw_ostream &O) {
+ printPCRelOperand(MI, OpNum, O);
+ }
+ void printPCRelTLSOperand(const MCInst *MI, uint64_t Address, int OpNum,
+ raw_ostream &O);
// Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
// This forms part of the instruction name rather than the operand list.
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 23d8585095cc..e62f5040898f 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -63,10 +63,6 @@ public:
const MCAsmLayout &Layout) const override {
return false;
}
- void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {
- llvm_unreachable("SystemZ does do not have assembler relaxation");
- }
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index d6cdacfcab92..e540ff4e4811 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -23,6 +23,4 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) {
UsesELFSectionDirectiveForBSS = true;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
-
- UseIntegratedAssembler = true;
}
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index eb2112674a12..f2ef1ad6c698 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -150,10 +150,9 @@ static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI,
const Triple &TT,
const MCTargetOptions &Options) {
MCAsmInfo *MAI = new SystemZMCAsmInfo(TT);
- MCCFIInstruction Inst =
- MCCFIInstruction::createDefCfa(nullptr,
- MRI.getDwarfRegNum(SystemZ::R15D, true),
- SystemZMC::CFAOffsetFromInitialSP);
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(
+ nullptr, MRI.getDwarfRegNum(SystemZ::R15D, true),
+ SystemZMC::CFAOffsetFromInitialSP);
MAI->addInitialFrameState(Inst);
return MAI;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h
index 0808160f627c..bedbd061ea5c 100644
--- a/llvm/lib/Target/SystemZ/SystemZ.h
+++ b/llvm/lib/Target/SystemZ/SystemZ.h
@@ -193,6 +193,7 @@ FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZTDCPass();
} // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 67c4aa08f90d..4109bfc11337 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -92,9 +92,9 @@ static void lowerAlignmentHint(const MachineInstr *MI, MCInst &LoweredMI,
return;
const MachineMemOperand *MMO = *MI->memoperands_begin();
unsigned AlignmentHint = 0;
- if (MMO->getAlignment() >= 16)
+ if (MMO->getAlign() >= Align(16))
AlignmentHint = 4;
- else if (MMO->getAlignment() >= 8)
+ else if (MMO->getAlign() >= Align(8))
AlignmentHint = 3;
if (AlignmentHint == 0)
return;
@@ -124,7 +124,7 @@ static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) {
.addImm(0);
}
-void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
SystemZMCInstLower Lower(MF->getContext(), *this);
MCInst LoweredMI;
switch (MI->getOpcode()) {
@@ -479,7 +479,7 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// that instead.
case SystemZ::Trap: {
MCSymbol *DotSym = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(DotSym);
+ OutStreamer->emitLabel(DotSym);
const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext);
const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext);
@@ -492,7 +492,7 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// to the relative immediate field of the jump instruction. (eg. "jo .+2")
case SystemZ::CondTrap: {
MCSymbol *DotSym = OutContext.createTempSymbol();
- OutStreamer->EmitLabel(DotSym);
+ OutStreamer->emitLabel(DotSym);
const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(DotSym, OutContext);
const MCConstantExpr *ConstExpr = MCConstantExpr::create(2, OutContext);
@@ -522,7 +522,6 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, LoweredMI);
}
-
// Emit the largest nop instruction smaller than or equal to NumBytes
// bytes. Return the size of nop emitted.
static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer,
@@ -532,22 +531,22 @@ static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer,
return 0;
}
else if (NumBytes < 4) {
- OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BCRAsm)
- .addImm(0).addReg(SystemZ::R0D), STI);
+ OutStreamer.emitInstruction(
+ MCInstBuilder(SystemZ::BCRAsm).addImm(0).addReg(SystemZ::R0D), STI);
return 2;
}
else if (NumBytes < 6) {
- OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BCAsm)
- .addImm(0).addReg(0).addImm(0).addReg(0),
- STI);
+ OutStreamer.emitInstruction(
+ MCInstBuilder(SystemZ::BCAsm).addImm(0).addReg(0).addImm(0).addReg(0),
+ STI);
return 4;
}
else {
MCSymbol *DotSym = OutContext.createTempSymbol();
const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext);
- OutStreamer.EmitLabel(DotSym);
- OutStreamer.EmitInstruction(MCInstBuilder(SystemZ::BRCLAsm)
- .addImm(0).addExpr(Dot), STI);
+ OutStreamer.emitLabel(DotSym);
+ OutStreamer.emitInstruction(
+ MCInstBuilder(SystemZ::BRCLAsm).addImm(0).addExpr(Dot), STI);
return 6;
}
}
@@ -560,9 +559,9 @@ void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
OutStreamer->PushSection();
OutStreamer->SwitchSection(
Ctx.getELFSection("__mcount_loc", ELF::SHT_PROGBITS, ELF::SHF_ALLOC));
- OutStreamer->EmitSymbolValue(DotSym, 8);
+ OutStreamer->emitSymbolValue(DotSym, 8);
OutStreamer->PopSection();
- OutStreamer->EmitLabel(DotSym);
+ OutStreamer->emitLabel(DotSym);
}
if (MF->getFunction().hasFnAttribute("mnop-mcount")) {
@@ -573,8 +572,9 @@ void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
const MCSymbolRefExpr *Op =
MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_PLT, Ctx);
- OutStreamer->EmitInstruction(MCInstBuilder(SystemZ::BRASL)
- .addReg(SystemZ::R0D).addExpr(Op), getSubtargetInfo());
+ OutStreamer->emitInstruction(
+ MCInstBuilder(SystemZ::BRASL).addReg(SystemZ::R0D).addExpr(Op),
+ getSubtargetInfo());
}
void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
@@ -585,7 +585,7 @@ void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
auto &Ctx = OutStreamer->getContext();
MCSymbol *MILabel = Ctx.createTempSymbol();
- OutStreamer->EmitLabel(MILabel);
+ OutStreamer->emitLabel(MILabel);
SM.recordStackMap(*MILabel, MI);
assert(NumNOPBytes % 2 == 0 && "Invalid number of NOP bytes requested!");
@@ -618,7 +618,7 @@ void SystemZAsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
SystemZMCInstLower &Lower) {
auto &Ctx = OutStreamer->getContext();
MCSymbol *MILabel = Ctx.createTempSymbol();
- OutStreamer->EmitLabel(MILabel);
+ OutStreamer->emitLabel(MILabel);
SM.recordPatchPoint(*MILabel, MI);
PatchPointOpers Opers(&MI);
@@ -685,8 +685,8 @@ getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) {
llvm_unreachable("Invalid SystemCPModifier!");
}
-void SystemZAsmPrinter::
-EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+void SystemZAsmPrinter::emitMachineConstantPoolValue(
+ MachineConstantPoolValue *MCPV) {
auto *ZCPV = static_cast<SystemZConstantPoolValue*>(MCPV);
const MCExpr *Expr =
@@ -695,7 +695,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
OutContext);
uint64_t Size = getDataLayout().getTypeAllocSize(ZCPV->getType());
- OutStreamer->EmitValue(Expr, Size);
+ OutStreamer->emitValue(Expr, Size);
}
bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -719,7 +719,7 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
-void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
+void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) {
emitStackMaps(SM);
}
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
index d01a17c2ebe2..2d7562c7238d 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -32,9 +32,9 @@ public:
// Override AsmPrinter.
StringRef getPassName() const override { return "SystemZ Assembly Printer"; }
- void EmitInstruction(const MachineInstr *MI) override;
- void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;
- void EmitEndOfAsmFile(Module &M) override;
+ void emitInstruction(const MachineInstr *MI) override;
+ void emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;
+ void emitEndOfAsmFile(Module &M) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &OS) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
index 4432adc6a269..d4c7ce07420b 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
@@ -108,7 +108,7 @@ inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,
// the location (register or stack slot) for the indirect pointer.
// (This duplicates the usual i64 calling convention rules.)
unsigned Reg = State.AllocateReg(SystemZ::ArgGPRs);
- unsigned Offset = Reg ? 0 : State.AllocateStack(8, 8);
+ unsigned Offset = Reg ? 0 : State.AllocateStack(8, Align(8));
// Use that same location for all the pending parts.
for (auto &It : PendingMembers) {
diff --git a/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
index ffeee4da95cc..86c6b2985385 100644
--- a/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -25,13 +25,12 @@ SystemZConstantPoolValue::Create(const GlobalValue *GV,
return new SystemZConstantPoolValue(GV, Modifier);
}
-int SystemZConstantPoolValue::
-getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) {
- unsigned AlignMask = Alignment - 1;
+int SystemZConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+ Align Alignment) {
const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants();
for (unsigned I = 0, E = Constants.size(); I != E; ++I) {
if (Constants[I].isMachineConstantPoolEntry() &&
- (Constants[I].getAlignment() & AlignMask) == 0) {
+ Constants[I].getAlign() >= Alignment) {
auto *ZCPV =
static_cast<SystemZConstantPoolValue *>(Constants[I].Val.MachineCPVal);
if (ZCPV->GV == GV && ZCPV->Modifier == Modifier)
diff --git a/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h b/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
index 6cb7710abdfe..da610ab45070 100644
--- a/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
+++ b/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -43,7 +43,7 @@ public:
// Override MachineConstantPoolValue.
int getExistingMachineCPValue(MachineConstantPool *CP,
- unsigned Alignment) override;
+ Align Alignment) override;
void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
void print(raw_ostream &O) const override;
diff --git a/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp
new file mode 100644
index 000000000000..7d21d29d270e
--- /dev/null
+++ b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp
@@ -0,0 +1,120 @@
+//===---------- SystemZPhysRegCopy.cpp - Handle phys reg copies -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass makes sure that a COPY of a physical register will be
+// implementable after register allocation in copyPhysReg() (this could be
+// done in EmitInstrWithCustomInserter() instead if COPY instructions would
+// be passed to it).
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define SYSTEMZ_COPYPHYSREGS_NAME "SystemZ Copy Physregs"
+
+namespace llvm {
+ void initializeSystemZCopyPhysRegsPass(PassRegistry&);
+}
+
+namespace {
+
+class SystemZCopyPhysRegs : public MachineFunctionPass {
+public:
+ static char ID;
+ SystemZCopyPhysRegs()
+ : MachineFunctionPass(ID), TII(nullptr), MRI(nullptr) {
+ initializeSystemZCopyPhysRegsPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return SYSTEMZ_COPYPHYSREGS_NAME; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+
+ bool visitMBB(MachineBasicBlock &MBB);
+
+ const SystemZInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+};
+
+char SystemZCopyPhysRegs::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(SystemZCopyPhysRegs, "systemz-copy-physregs",
+ SYSTEMZ_COPYPHYSREGS_NAME, false, false)
+
+FunctionPass *llvm::createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM) {
+ return new SystemZCopyPhysRegs();
+}
+
+void SystemZCopyPhysRegs::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool SystemZCopyPhysRegs::visitMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ // Certain special registers can only be copied from a subset of the
+ // default register class of the type. It is therefore necessary to create
+ // the target copy instructions before regalloc instead of in copyPhysReg().
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MBBI != E; ) {
+ MachineInstr *MI = &*MBBI++;
+ if (!MI->isCopy())
+ continue;
+
+ DebugLoc DL = MI->getDebugLoc();
+ Register SrcReg = MI->getOperand(1).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
+ if (DstReg.isVirtual() &&
+ (SrcReg == SystemZ::CC || SystemZ::AR32BitRegClass.contains(SrcReg))) {
+ Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass);
+ if (SrcReg == SystemZ::CC)
+ BuildMI(MBB, MI, DL, TII->get(SystemZ::IPM), Tmp);
+ else
+ BuildMI(MBB, MI, DL, TII->get(SystemZ::EAR), Tmp).addReg(SrcReg);
+ MI->getOperand(1).setReg(Tmp);
+ Modified = true;
+ }
+ else if (SrcReg.isVirtual() &&
+ SystemZ::AR32BitRegClass.contains(DstReg)) {
+ Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass);
+ MI->getOperand(0).setReg(Tmp);
+ BuildMI(MBB, MBBI, DL, TII->get(SystemZ::SAR), DstReg).addReg(Tmp);
+ Modified = true;
+ }
+ }
+
+ return Modified;
+}
+
+bool SystemZCopyPhysRegs::runOnMachineFunction(MachineFunction &F) {
+ TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
+ MRI = &F.getRegInfo();
+
+ bool Modified = false;
+ for (auto &MBB : F)
+ Modified |= visitMBB(MBB);
+
+ return Modified;
+}
+
diff --git a/llvm/lib/Target/SystemZ/SystemZFeatures.td b/llvm/lib/Target/SystemZ/SystemZFeatures.td
index dae795e845b0..28f58cb310af 100644
--- a/llvm/lib/Target/SystemZ/SystemZFeatures.td
+++ b/llvm/lib/Target/SystemZ/SystemZFeatures.td
@@ -10,13 +10,13 @@
//
//===----------------------------------------------------------------------===//
-class SystemZFeature<string extname, string intname, string desc>
- : Predicate<"Subtarget->has"##intname##"()">,
- AssemblerPredicate<"Feature"##intname, extname>,
- SubtargetFeature<extname, "Has"##intname, "true", desc>;
+class SystemZFeature<string extname, string intname, dag featdag, string desc>
+ : Predicate<"Subtarget->has"#intname#"()">,
+ AssemblerPredicate<featdag, extname>,
+ SubtargetFeature<extname, "Has"#intname, "true", desc>;
class SystemZMissingFeature<string intname>
- : Predicate<"!Subtarget->has"##intname##"()">;
+ : Predicate<"!Subtarget->has"#intname#"()">;
class SystemZFeatureList<list<SystemZFeature> x> {
list<SystemZFeature> List = x;
@@ -25,6 +25,13 @@ class SystemZFeatureList<list<SystemZFeature> x> {
class SystemZFeatureAdd<list<SystemZFeature> x, list<SystemZFeature> y>
: SystemZFeatureList<!listconcat(x, y)>;
+// This feature is added as a subtarget feature whenever the function is
+// compiled to use soft-float.
+def FeatureSoftFloat : SystemZFeature<
+ "soft-float", "SoftFloat", (all_of FeatureSoftFloat),
+ "Use software emulation for floating point"
+>;
+
//===----------------------------------------------------------------------===//
//
// New features added in the Ninth Edition of the z/Architecture
@@ -32,54 +39,54 @@ class SystemZFeatureAdd<list<SystemZFeature> x, list<SystemZFeature> y>
//===----------------------------------------------------------------------===//
def FeatureDistinctOps : SystemZFeature<
- "distinct-ops", "DistinctOps",
+ "distinct-ops", "DistinctOps", (all_of FeatureDistinctOps),
"Assume that the distinct-operands facility is installed"
>;
def FeatureFastSerialization : SystemZFeature<
- "fast-serialization", "FastSerialization",
+ "fast-serialization", "FastSerialization", (all_of FeatureFastSerialization),
"Assume that the fast-serialization facility is installed"
>;
def FeatureFPExtension : SystemZFeature<
- "fp-extension", "FPExtension",
+ "fp-extension", "FPExtension", (all_of FeatureFPExtension),
"Assume that the floating-point extension facility is installed"
>;
def FeatureHighWord : SystemZFeature<
- "high-word", "HighWord",
+ "high-word", "HighWord", (all_of FeatureHighWord),
"Assume that the high-word facility is installed"
>;
def FeatureInterlockedAccess1 : SystemZFeature<
- "interlocked-access1", "InterlockedAccess1",
+ "interlocked-access1", "InterlockedAccess1", (all_of FeatureInterlockedAccess1),
"Assume that interlocked-access facility 1 is installed"
>;
def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">;
def FeatureLoadStoreOnCond : SystemZFeature<
- "load-store-on-cond", "LoadStoreOnCond",
+ "load-store-on-cond", "LoadStoreOnCond", (all_of FeatureLoadStoreOnCond),
"Assume that the load/store-on-condition facility is installed"
>;
def FeatureNoLoadStoreOnCond : SystemZMissingFeature<"LoadStoreOnCond">;
def FeaturePopulationCount : SystemZFeature<
- "population-count", "PopulationCount",
+ "population-count", "PopulationCount", (all_of FeaturePopulationCount),
"Assume that the population-count facility is installed"
>;
def FeatureMessageSecurityAssist3 : SystemZFeature<
- "message-security-assist-extension3", "MessageSecurityAssist3",
+ "message-security-assist-extension3", "MessageSecurityAssist3", (all_of FeatureMessageSecurityAssist3),
"Assume that the message-security-assist extension facility 3 is installed"
>;
def FeatureMessageSecurityAssist4 : SystemZFeature<
- "message-security-assist-extension4", "MessageSecurityAssist4",
+ "message-security-assist-extension4", "MessageSecurityAssist4", (all_of FeatureMessageSecurityAssist4),
"Assume that the message-security-assist extension facility 4 is installed"
>;
def FeatureResetReferenceBitsMultiple : SystemZFeature<
- "reset-reference-bits-multiple", "ResetReferenceBitsMultiple",
+ "reset-reference-bits-multiple", "ResetReferenceBitsMultiple", (all_of FeatureResetReferenceBitsMultiple),
"Assume that the reset-reference-bits-multiple facility is installed"
>;
@@ -103,37 +110,37 @@ def Arch9NewFeatures : SystemZFeatureList<[
//===----------------------------------------------------------------------===//
def FeatureExecutionHint : SystemZFeature<
- "execution-hint", "ExecutionHint",
+ "execution-hint", "ExecutionHint", (all_of FeatureExecutionHint),
"Assume that the execution-hint facility is installed"
>;
def FeatureLoadAndTrap : SystemZFeature<
- "load-and-trap", "LoadAndTrap",
+ "load-and-trap", "LoadAndTrap", (all_of FeatureLoadAndTrap),
"Assume that the load-and-trap facility is installed"
>;
def FeatureMiscellaneousExtensions : SystemZFeature<
- "miscellaneous-extensions", "MiscellaneousExtensions",
+ "miscellaneous-extensions", "MiscellaneousExtensions", (all_of FeatureMiscellaneousExtensions),
"Assume that the miscellaneous-extensions facility is installed"
>;
def FeatureProcessorAssist : SystemZFeature<
- "processor-assist", "ProcessorAssist",
+ "processor-assist", "ProcessorAssist", (all_of FeatureProcessorAssist),
"Assume that the processor-assist facility is installed"
>;
def FeatureTransactionalExecution : SystemZFeature<
- "transactional-execution", "TransactionalExecution",
+ "transactional-execution", "TransactionalExecution", (all_of FeatureTransactionalExecution),
"Assume that the transactional-execution facility is installed"
>;
def FeatureDFPZonedConversion : SystemZFeature<
- "dfp-zoned-conversion", "DFPZonedConversion",
+ "dfp-zoned-conversion", "DFPZonedConversion", (all_of FeatureDFPZonedConversion),
"Assume that the DFP zoned-conversion facility is installed"
>;
def FeatureEnhancedDAT2 : SystemZFeature<
- "enhanced-dat-2", "EnhancedDAT2",
+ "enhanced-dat-2", "EnhancedDAT2", (all_of FeatureEnhancedDAT2),
"Assume that the enhanced-DAT facility 2 is installed"
>;
@@ -154,27 +161,27 @@ def Arch10NewFeatures : SystemZFeatureList<[
//===----------------------------------------------------------------------===//
def FeatureLoadAndZeroRightmostByte : SystemZFeature<
- "load-and-zero-rightmost-byte", "LoadAndZeroRightmostByte",
+ "load-and-zero-rightmost-byte", "LoadAndZeroRightmostByte", (all_of FeatureLoadAndZeroRightmostByte),
"Assume that the load-and-zero-rightmost-byte facility is installed"
>;
def FeatureLoadStoreOnCond2 : SystemZFeature<
- "load-store-on-cond-2", "LoadStoreOnCond2",
+ "load-store-on-cond-2", "LoadStoreOnCond2", (all_of FeatureLoadStoreOnCond2),
"Assume that the load/store-on-condition facility 2 is installed"
>;
def FeatureMessageSecurityAssist5 : SystemZFeature<
- "message-security-assist-extension5", "MessageSecurityAssist5",
+ "message-security-assist-extension5", "MessageSecurityAssist5", (all_of FeatureMessageSecurityAssist5),
"Assume that the message-security-assist extension facility 5 is installed"
>;
def FeatureDFPPackedConversion : SystemZFeature<
- "dfp-packed-conversion", "DFPPackedConversion",
+ "dfp-packed-conversion", "DFPPackedConversion", (all_of FeatureDFPPackedConversion),
"Assume that the DFP packed-conversion facility is installed"
>;
def FeatureVector : SystemZFeature<
- "vector", "Vector",
+ "vector", "Vector", (all_of FeatureVector),
"Assume that the vectory facility is installed"
>;
def FeatureNoVector : SystemZMissingFeature<"Vector">;
@@ -194,38 +201,38 @@ def Arch11NewFeatures : SystemZFeatureList<[
//===----------------------------------------------------------------------===//
def FeatureMiscellaneousExtensions2 : SystemZFeature<
- "miscellaneous-extensions-2", "MiscellaneousExtensions2",
+ "miscellaneous-extensions-2", "MiscellaneousExtensions2", (all_of FeatureMiscellaneousExtensions2),
"Assume that the miscellaneous-extensions facility 2 is installed"
>;
def FeatureGuardedStorage : SystemZFeature<
- "guarded-storage", "GuardedStorage",
+ "guarded-storage", "GuardedStorage", (all_of FeatureGuardedStorage),
"Assume that the guarded-storage facility is installed"
>;
def FeatureMessageSecurityAssist7 : SystemZFeature<
- "message-security-assist-extension7", "MessageSecurityAssist7",
+ "message-security-assist-extension7", "MessageSecurityAssist7", (all_of FeatureMessageSecurityAssist7),
"Assume that the message-security-assist extension facility 7 is installed"
>;
def FeatureMessageSecurityAssist8 : SystemZFeature<
- "message-security-assist-extension8", "MessageSecurityAssist8",
+ "message-security-assist-extension8", "MessageSecurityAssist8", (all_of FeatureMessageSecurityAssist8),
"Assume that the message-security-assist extension facility 8 is installed"
>;
def FeatureVectorEnhancements1 : SystemZFeature<
- "vector-enhancements-1", "VectorEnhancements1",
+ "vector-enhancements-1", "VectorEnhancements1", (all_of FeatureVectorEnhancements1),
"Assume that the vector enhancements facility 1 is installed"
>;
def FeatureNoVectorEnhancements1 : SystemZMissingFeature<"VectorEnhancements1">;
def FeatureVectorPackedDecimal : SystemZFeature<
- "vector-packed-decimal", "VectorPackedDecimal",
+ "vector-packed-decimal", "VectorPackedDecimal", (all_of FeatureVectorPackedDecimal),
"Assume that the vector packed decimal facility is installed"
>;
def FeatureInsertReferenceBitsMultiple : SystemZFeature<
- "insert-reference-bits-multiple", "InsertReferenceBitsMultiple",
+ "insert-reference-bits-multiple", "InsertReferenceBitsMultiple", (all_of FeatureInsertReferenceBitsMultiple),
"Assume that the insert-reference-bits-multiple facility is installed"
>;
@@ -246,32 +253,32 @@ def Arch12NewFeatures : SystemZFeatureList<[
//===----------------------------------------------------------------------===//
def FeatureMiscellaneousExtensions3 : SystemZFeature<
- "miscellaneous-extensions-3", "MiscellaneousExtensions3",
+ "miscellaneous-extensions-3", "MiscellaneousExtensions3", (all_of FeatureMiscellaneousExtensions3),
"Assume that the miscellaneous-extensions facility 3 is installed"
>;
def FeatureMessageSecurityAssist9 : SystemZFeature<
- "message-security-assist-extension9", "MessageSecurityAssist9",
+ "message-security-assist-extension9", "MessageSecurityAssist9", (all_of FeatureMessageSecurityAssist9),
"Assume that the message-security-assist extension facility 9 is installed"
>;
def FeatureVectorEnhancements2 : SystemZFeature<
- "vector-enhancements-2", "VectorEnhancements2",
+ "vector-enhancements-2", "VectorEnhancements2", (all_of FeatureVectorEnhancements2),
"Assume that the vector enhancements facility 2 is installed"
>;
def FeatureVectorPackedDecimalEnhancement : SystemZFeature<
- "vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement",
+ "vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement", (all_of FeatureVectorPackedDecimalEnhancement),
"Assume that the vector packed decimal enhancement facility is installed"
>;
def FeatureEnhancedSort : SystemZFeature<
- "enhanced-sort", "EnhancedSort",
+ "enhanced-sort", "EnhancedSort", (all_of FeatureEnhancedSort),
"Assume that the enhanced-sort facility is installed"
>;
def FeatureDeflateConversion : SystemZFeature<
- "deflate-conversion", "DeflateConversion",
+ "deflate-conversion", "DeflateConversion", (all_of FeatureDeflateConversion),
"Assume that the deflate-conversion facility is installed"
>;
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 3cdf6bf98ee0..985722fdcab4 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -62,18 +63,6 @@ SystemZFrameLowering::SystemZFrameLowering()
RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset;
}
-static bool usePackedStack(MachineFunction &MF) {
- bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack");
- bool IsVarArg = MF.getFunction().isVarArg();
- bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC;
- bool BackChain = MF.getFunction().hasFnAttribute("backchain");
- bool FramAddressTaken = MF.getFrameInfo().isFrameAddressTaken();
- if (HasPackedStackAttr && BackChain)
- report_fatal_error("packed-stack with backchain is currently unsupported.");
- return HasPackedStackAttr && !IsVarArg && CallConv && !BackChain &&
- !FramAddressTaken;
-}
-
bool SystemZFrameLowering::
assignCalleeSavedSpillSlots(MachineFunction &MF,
const TargetRegisterInfo *TRI,
@@ -87,71 +76,44 @@ assignCalleeSavedSpillSlots(MachineFunction &MF,
unsigned LowGPR = 0;
unsigned HighGPR = SystemZ::R15D;
int StartSPOffset = SystemZMC::CallFrameSize;
- int CurrOffset;
- if (!usePackedStack(MF)) {
- for (auto &CS : CSI) {
- unsigned Reg = CS.getReg();
- int Offset = RegSpillOffsets[Reg];
- if (Offset) {
- if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) {
- LowGPR = Reg;
- StartSPOffset = Offset;
- }
- Offset -= SystemZMC::CallFrameSize;
- int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset);
- CS.setFrameIdx(FrameIdx);
- } else
- CS.setFrameIdx(INT32_MAX);
- }
-
- // Save the range of call-saved registers, for use by the
- // prologue/epilogue inserters.
- ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset);
- if (IsVarArg) {
- // Also save the GPR varargs, if any. R6D is call-saved, so would
- // already be included, but we also need to handle the call-clobbered
- // argument registers.
- unsigned FirstGPR = ZFI->getVarArgsFirstGPR();
- if (FirstGPR < SystemZ::NumArgGPRs) {
- unsigned Reg = SystemZ::ArgGPRs[FirstGPR];
- int Offset = RegSpillOffsets[Reg];
- if (StartSPOffset > Offset) {
- LowGPR = Reg; StartSPOffset = Offset;
- }
+ for (auto &CS : CSI) {
+ unsigned Reg = CS.getReg();
+ int Offset = getRegSpillOffset(MF, Reg);
+ if (Offset) {
+ if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) {
+ LowGPR = Reg;
+ StartSPOffset = Offset;
}
- }
- ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset);
+ Offset -= SystemZMC::CallFrameSize;
+ int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset);
+ CS.setFrameIdx(FrameIdx);
+ } else
+ CS.setFrameIdx(INT32_MAX);
+ }
- CurrOffset = -SystemZMC::CallFrameSize;
- } else {
- // Packed stack: put all the GPRs at the top of the Register save area.
- uint32_t LowGR64Num = UINT32_MAX;
- for (auto &CS : CSI) {
- unsigned Reg = CS.getReg();
- if (SystemZ::GR64BitRegClass.contains(Reg)) {
- unsigned GR64Num = SystemZMC::getFirstReg(Reg);
- int Offset = -8 * (15 - GR64Num + 1);
- if (LowGR64Num > GR64Num) {
- LowGR64Num = GR64Num;
- StartSPOffset = SystemZMC::CallFrameSize + Offset;
- }
- int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset);
- CS.setFrameIdx(FrameIdx);
- } else
- CS.setFrameIdx(INT32_MAX);
+ // Save the range of call-saved registers, for use by the
+ // prologue/epilogue inserters.
+ ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset);
+ if (IsVarArg) {
+ // Also save the GPR varargs, if any. R6D is call-saved, so would
+ // already be included, but we also need to handle the call-clobbered
+ // argument registers.
+ unsigned FirstGPR = ZFI->getVarArgsFirstGPR();
+ if (FirstGPR < SystemZ::NumArgGPRs) {
+ unsigned Reg = SystemZ::ArgGPRs[FirstGPR];
+ int Offset = getRegSpillOffset(MF, Reg);
+ if (StartSPOffset > Offset) {
+ LowGPR = Reg; StartSPOffset = Offset;
+ }
}
- if (LowGR64Num < UINT32_MAX)
- LowGPR = SystemZMC::GR64Regs[LowGR64Num];
-
- // Save the range of call-saved registers, for use by the
- // prologue/epilogue inserters.
- ZFI->setRestoreGPRRegs(LowGPR, HighGPR, StartSPOffset);
- ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset);
-
- CurrOffset = LowGPR ? -(SystemZMC::CallFrameSize - StartSPOffset) : 0;
}
+ ZFI->setSpillGPRRegs(LowGPR, HighGPR, StartSPOffset);
// Create fixed stack objects for the remaining registers.
+ int CurrOffset = -SystemZMC::CallFrameSize;
+ if (usePackedStack(MF))
+ CurrOffset += StartSPOffset;
+
for (auto &CS : CSI) {
if (CS.getFrameIdx() != INT32_MAX)
continue;
@@ -234,11 +196,9 @@ static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
}
}
-bool SystemZFrameLowering::
-spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool SystemZFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -296,11 +256,9 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-bool SystemZFrameLowering::
-restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+bool SystemZFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -358,9 +316,10 @@ void SystemZFrameLowering::
processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ bool BackChain = MF.getFunction().hasFnAttribute("backchain");
- if (!usePackedStack(MF))
- // Always create the full incoming register save area.
+ if (!usePackedStack(MF) || BackChain)
+ // Create the incoming register save area.
getOrCreateFramePointerSaveIndex(MF);
// Get the size of our stack frame to be allocated ...
@@ -382,16 +341,15 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF,
// are outside the reach of an unsigned 12-bit displacement.
// Create 2 for the case where both addresses in an MVC are
// out of range.
- RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, 8, false));
- RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, 8, false));
+ RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false));
+ RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, Align(8), false));
}
}
// Emit instructions before MBBI (in MBB) to add NumBytes to Reg.
static void emitIncrement(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- const DebugLoc &DL,
- unsigned Reg, int64_t NumBytes,
+ MachineBasicBlock::iterator &MBBI, const DebugLoc &DL,
+ Register Reg, int64_t NumBytes,
const TargetInstrInfo *TII) {
while (NumBytes) {
unsigned Opcode;
@@ -416,12 +374,39 @@ static void emitIncrement(MachineBasicBlock &MBB,
}
}
+// Add CFI for the new CFA offset.
+static void buildCFAOffs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, int Offset,
+ const SystemZInstrInfo *ZII) {
+ unsigned CFIIndex = MBB.getParent()->addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset));
+ BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+}
+
+// Add CFI for the new frame location.
+static void buildDefCFAReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned Reg,
+ const SystemZInstrInfo *ZII) {
+ MachineFunction &MF = *MBB.getParent();
+ MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+ unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
+ BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+}
+
void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+ const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
+ const SystemZTargetLowering &TLI = *STI.getTargetLowering();
MachineFrameInfo &MFFrame = MF.getFrameInfo();
- auto *ZII =
- static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ auto *ZII = static_cast<const SystemZInstrInfo *>(STI.getInstrInfo());
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineModuleInfo &MMI = MF.getMMI();
@@ -504,19 +489,31 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
// Allocate StackSize bytes.
int64_t Delta = -int64_t(StackSize);
- emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
-
- // Add CFI for the allocation.
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, SPOffsetFromCFA + Delta));
- BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ const unsigned ProbeSize = TLI.getStackProbeSize(MF);
+ bool FreeProbe = (ZFI->getSpillGPRRegs().GPROffset &&
+ (ZFI->getSpillGPRRegs().GPROffset + StackSize) < ProbeSize);
+ if (!FreeProbe &&
+ MF.getSubtarget().getTargetLowering()->hasInlineStackProbe(MF)) {
+ // Stack probing may involve looping, but splitting the prologue block
+ // is not possible at this point since it would invalidate the
+ // SaveBlocks / RestoreBlocks sets of PEI in the single block function
+ // case. Build a pseudo to be handled later by inlineStackProbe().
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::PROBED_STACKALLOC))
+ .addImm(StackSize);
+ }
+ else {
+ emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
+ buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII);
+ }
SPOffsetFromCFA += Delta;
- if (StoreBackchain)
+ if (StoreBackchain) {
+ // The back chain is stored topmost with packed-stack.
+ int Offset = usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0;
BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG))
- .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0)
- .addReg(0);
+ .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D)
+ .addImm(Offset).addReg(0);
+ }
}
if (HasFP) {
@@ -525,11 +522,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(SystemZ::R15D);
// Add CFI for the new frame location.
- unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true);
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createDefCfaRegister(nullptr, HardFP));
- BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ buildDefCFAReg(MBB, MBBI, DL, SystemZ::R11D, ZII);
// Mark the FramePtr as live at the beginning of every block except
// the entry block. (We'll have marked R11 as live on entry when
@@ -560,7 +553,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
// Add CFI for the this save.
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
- unsigned IgnoredFrameReg;
+ Register IgnoredFrameReg;
int64_t Offset =
getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg);
@@ -622,6 +615,91 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
+void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const {
+ auto *ZII =
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
+ const SystemZTargetLowering &TLI = *STI.getTargetLowering();
+
+ MachineInstr *StackAllocMI = nullptr;
+ for (MachineInstr &MI : PrologMBB)
+ if (MI.getOpcode() == SystemZ::PROBED_STACKALLOC) {
+ StackAllocMI = &MI;
+ break;
+ }
+ if (StackAllocMI == nullptr)
+ return;
+ uint64_t StackSize = StackAllocMI->getOperand(0).getImm();
+ const unsigned ProbeSize = TLI.getStackProbeSize(MF);
+ uint64_t NumFullBlocks = StackSize / ProbeSize;
+ uint64_t Residual = StackSize % ProbeSize;
+ int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
+ MachineBasicBlock *MBB = &PrologMBB;
+ MachineBasicBlock::iterator MBBI = StackAllocMI;
+ const DebugLoc DL = StackAllocMI->getDebugLoc();
+
+ // Allocate a block of Size bytes on the stack and probe it.
+ auto allocateAndProbe = [&](MachineBasicBlock &InsMBB,
+ MachineBasicBlock::iterator InsPt, unsigned Size,
+ bool EmitCFI) -> void {
+ emitIncrement(InsMBB, InsPt, DL, SystemZ::R15D, -int64_t(Size), ZII);
+ if (EmitCFI) {
+ SPOffsetFromCFA -= Size;
+ buildCFAOffs(InsMBB, InsPt, DL, SPOffsetFromCFA, ZII);
+ }
+ // Probe by means of a volatile compare.
+ MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
+ BuildMI(InsMBB, InsPt, DL, ZII->get(SystemZ::CG))
+ .addReg(SystemZ::R0D, RegState::Undef)
+ .addReg(SystemZ::R15D).addImm(Size - 8).addReg(0)
+ .addMemOperand(MMO);
+ };
+
+ if (NumFullBlocks < 3) {
+ // Emit unrolled probe statements.
+ for (unsigned int i = 0; i < NumFullBlocks; i++)
+ allocateAndProbe(*MBB, MBBI, ProbeSize, true/*EmitCFI*/);
+ } else {
+ // Emit a loop probing the pages.
+ uint64_t LoopAlloc = ProbeSize * NumFullBlocks;
+ SPOffsetFromCFA -= LoopAlloc;
+
+ BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R1D)
+ .addReg(SystemZ::R15D);
+ buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R1D, ZII);
+ emitIncrement(*MBB, MBBI, DL, SystemZ::R1D, -int64_t(LoopAlloc), ZII);
+ buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::CallFrameSize + LoopAlloc),
+ ZII);
+
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MBBI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB);
+ MBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(DoneMBB);
+
+ MBB = LoopMBB;
+ allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/);
+ BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR))
+ .addReg(SystemZ::R15D).addReg(SystemZ::R1D);
+ BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB);
+
+ MBB = DoneMBB;
+ MBBI = DoneMBB->begin();
+ buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R15D, ZII);
+
+ recomputeLiveIns(*DoneMBB);
+ recomputeLiveIns(*LoopMBB);
+ }
+
+ if (Residual)
+ allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/);
+
+ StackAllocMI->eraseFromParent();
+}
+
bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
MF.getFrameInfo().hasVarSizedObjects() ||
@@ -639,7 +717,7 @@ SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI,
- unsigned &FrameReg) const {
+ Register &FrameReg) const {
// Our incoming SP is actually SystemZMC::CallFrameSize below the CFA, so
// add that difference here.
int64_t Offset =
@@ -664,14 +742,43 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
}
}
+unsigned SystemZFrameLowering::getRegSpillOffset(MachineFunction &MF,
+ Register Reg) const {
+ bool IsVarArg = MF.getFunction().isVarArg();
+ bool BackChain = MF.getFunction().hasFnAttribute("backchain");
+ bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat();
+ unsigned Offset = RegSpillOffsets[Reg];
+ if (usePackedStack(MF) && !(IsVarArg && !SoftFloat)) {
+ if (SystemZ::GR64BitRegClass.contains(Reg))
+ // Put all GPRs at the top of the Register save area with packed
+ // stack. Make room for the backchain if needed.
+ Offset += BackChain ? 24 : 32;
+ else
+ Offset = 0;
+ }
+ return Offset;
+}
+
int SystemZFrameLowering::
getOrCreateFramePointerSaveIndex(MachineFunction &MF) const {
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
int FI = ZFI->getFramePointerSaveIndex();
if (!FI) {
MachineFrameInfo &MFFrame = MF.getFrameInfo();
- FI = MFFrame.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
+ // The back chain is stored topmost with packed-stack.
+ int Offset = usePackedStack(MF) ? -8 : -SystemZMC::CallFrameSize;
+ FI = MFFrame.CreateFixedObject(8, Offset, false);
ZFI->setFramePointerSaveIndex(FI);
}
return FI;
}
+
+bool SystemZFrameLowering::usePackedStack(MachineFunction &MF) const {
+ bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack");
+ bool BackChain = MF.getFunction().hasFnAttribute("backchain");
+ bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat();
+ if (HasPackedStackAttr && BackChain && !SoftFloat)
+ report_fatal_error("packed-stack + backchain + hard-float is unsupported.");
+ bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC;
+ return HasPackedStackAttr && CallConv;
+}
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 4189a92b8294..8752acc7e5ae 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -32,33 +32,36 @@ public:
RegScavenger *RS) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- const std::vector<CalleeSavedInfo> &CSI,
+ ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBII,
- std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const
- override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBII,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const override;
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const override;
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
+ Register &FrameReg) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
// Return the byte offset from the incoming stack pointer of Reg's
- // ABI-defined save slot. Return 0 if no slot is defined for Reg.
- unsigned getRegSpillOffset(unsigned Reg) const {
- return RegSpillOffsets[Reg];
- }
+ // ABI-defined save slot. Return 0 if no slot is defined for Reg. Adjust
+ // the offset in case MF has packed-stack.
+ unsigned getRegSpillOffset(MachineFunction &MF, Register Reg) const;
// Get or create the frame index of where the old frame pointer is stored.
int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const;
+
+ bool usePackedStack(MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 3927a977e6fc..37328684399b 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1456,7 +1456,8 @@ bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,
auto *StoreA = cast<StoreSDNode>(N);
auto *LoadA = cast<LoadSDNode>(StoreA->getValue().getOperand(1 - I));
auto *LoadB = cast<LoadSDNode>(StoreA->getValue().getOperand(I));
- return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB);
+ return !LoadA->isVolatile() && LoadA->getMemoryVT() == LoadB->getMemoryVT() &&
+ canUseBlockOperation(StoreA, LoadB);
}
void SystemZDAGToDAGISel::Select(SDNode *Node) {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index c73905d3357a..eb1e51341ec4 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -88,25 +88,27 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
else
addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
- if (Subtarget.hasVector()) {
- addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
- addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
- } else {
- addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
- addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
- }
- if (Subtarget.hasVectorEnhancements1())
- addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
- else
- addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
+ if (!useSoftFloat()) {
+ if (Subtarget.hasVector()) {
+ addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
+ addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
+ } else {
+ addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
+ addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
+ }
+ if (Subtarget.hasVectorEnhancements1())
+ addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
+ else
+ addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
- if (Subtarget.hasVector()) {
- addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
- addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
+ if (Subtarget.hasVector()) {
+ addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
+ }
}
// Compute derived properties from the register classes
@@ -639,12 +641,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::STRICT_FP_ROUND);
setTargetDAGCombine(ISD::FP_EXTEND);
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::SDIV);
setTargetDAGCombine(ISD::UDIV);
setTargetDAGCombine(ISD::SREM);
setTargetDAGCombine(ISD::UREM);
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -666,6 +672,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
IsStrictFPEnabled = true;
}
+bool SystemZTargetLowering::useSoftFloat() const {
+ return Subtarget.hasSoftFloat();
+}
+
EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &, EVT VT) const {
if (!VT.isVector())
@@ -816,6 +826,15 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
}
+/// Returns true if stack probing through inline assembly is requested.
+bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
+ // If the function specifically requests inline stack probes, emit them.
+ if (MF.getFunction().hasFnAttribute("probe-stack"))
+ return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
+ "inline-asm";
+ return false;
+}
+
bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
// We can use CGFI or CLGFI.
return isInt<32>(Imm) || isUInt<32>(Imm);
@@ -1123,12 +1142,14 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
case 'f': // Floating-point register
- if (VT == MVT::f64)
- return std::make_pair(0U, &SystemZ::FP64BitRegClass);
- else if (VT == MVT::f128)
- return std::make_pair(0U, &SystemZ::FP128BitRegClass);
- return std::make_pair(0U, &SystemZ::FP32BitRegClass);
-
+ if (!useSoftFloat()) {
+ if (VT == MVT::f64)
+ return std::make_pair(0U, &SystemZ::FP64BitRegClass);
+ else if (VT == MVT::f128)
+ return std::make_pair(0U, &SystemZ::FP128BitRegClass);
+ return std::make_pair(0U, &SystemZ::FP32BitRegClass);
+ }
+ break;
case 'v': // Vector register
if (Subtarget.hasVector()) {
if (VT == MVT::f32)
@@ -1156,6 +1177,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
SystemZMC::GR64Regs, 16);
}
if (Constraint[1] == 'f') {
+ if (useSoftFloat())
+ return std::make_pair(
+ 0u, static_cast<const TargetRegisterClass *>(nullptr));
if (VT == MVT::f32)
return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
SystemZMC::FP32Regs, 16);
@@ -1166,6 +1190,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
SystemZMC::FP64Regs, 16);
}
if (Constraint[1] == 'v') {
+ if (!Subtarget.hasVector())
+ return std::make_pair(
+ 0u, static_cast<const TargetRegisterClass *>(nullptr));
if (VT == MVT::f32)
return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
SystemZMC::VR32Regs, 32);
@@ -1179,6 +1206,19 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+Register SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
+ const MachineFunction &MF) const {
+
+ Register Reg = StringSwitch<Register>(RegName)
+ .Case("r15", SystemZ::R15D)
+ .Default(0);
+ if (Reg)
+ return Reg;
+ report_fatal_error("Invalid register name global variable");
+}
+
void SystemZTargetLowering::
LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
std::vector<SDValue> &Ops,
@@ -1437,17 +1477,19 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
// ...and a similar frame index for the caller-allocated save area
// that will be used to store the incoming registers.
- int64_t RegSaveOffset = -SystemZMC::CallFrameSize;
+ int64_t RegSaveOffset =
+ -SystemZMC::CallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
// Store the FPR varargs in the reserved frame slots. (We store the
// GPRs as part of the prologue.)
- if (NumFixedFPRs < SystemZ::NumArgFPRs) {
+ if (NumFixedFPRs < SystemZ::NumArgFPRs && !useSoftFloat()) {
SDValue MemOps[SystemZ::NumArgFPRs];
for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
- unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
- int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true);
+ unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ArgFPRs[I]);
+ int FI =
+ MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize + Offset, true);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
&SystemZ::FP64BitRegClass);
@@ -1633,6 +1675,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (IsTailCall)
return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
+ DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
Glue = Chain.getValue(1);
// Mark the end of the call, which is glued to the call itself.
@@ -2020,8 +2063,9 @@ static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
// We must have an 8- or 16-bit load.
auto *Load = cast<LoadSDNode>(C.Op0);
- unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
- if (NumBits != 8 && NumBits != 16)
+ unsigned NumBits = Load->getMemoryVT().getSizeInBits();
+ if ((NumBits != 8 && NumBits != 16) ||
+ NumBits != Load->getMemoryVT().getStoreSizeInBits())
return;
// The load must be an extending one and the constant must be within the
@@ -2161,15 +2205,6 @@ static bool shouldSwapCmpOperands(const Comparison &C) {
return false;
}
-// Return a version of comparison CC mask CCMask in which the LT and GT
-// actions are swapped.
-static unsigned reverseCCMask(unsigned CCMask) {
- return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
- (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
- (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
- (CCMask & SystemZ::CCMASK_CMP_UO));
-}
-
// Check whether C tests for equality between X and Y and whether X - Y
// or Y - X is also computed. In that case it's better to compare the
// result of the subtraction against zero.
@@ -2205,7 +2240,7 @@ static void adjustForFNeg(Comparison &C) {
SDNode *N = *I;
if (N->getOpcode() == ISD::FNEG) {
C.Op0 = SDValue(N, 0);
- C.CCMask = reverseCCMask(C.CCMask);
+ C.CCMask = SystemZ::reverseCCMask(C.CCMask);
return;
}
}
@@ -2572,7 +2607,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
if (shouldSwapCmpOperands(C)) {
std::swap(C.Op0, C.Op1);
- C.CCMask = reverseCCMask(C.CCMask);
+ C.CCMask = SystemZ::reverseCCMask(C.CCMask);
}
adjustForTestUnderMask(DAG, DL, C);
@@ -3103,7 +3138,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue *CPV =
SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
- Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
Offset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), Offset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
@@ -3118,7 +3153,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue *CPV =
SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
- Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
Offset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), Offset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
@@ -3136,7 +3171,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
// Add the per-symbol offset.
CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
- SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
+ SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
DTPOffset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), DTPOffset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
@@ -3161,7 +3196,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue *CPV =
SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
- Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
Offset = DAG.getLoad(
PtrVT, DL, DAG.getEntryNode(), Offset,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
@@ -3202,11 +3237,11 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
SDValue Result;
if (CP->isMachineConstantPoolEntry())
- Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
- CP->getAlignment());
+ Result =
+ DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
else
- Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
- CP->getAlignment(), CP->getOffset());
+ Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
+ CP->getOffset());
// Use LARL to load the address of the constant pool entry.
return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
@@ -3214,6 +3249,8 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
+ auto *TFL =
+ static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setFrameAddressIsTaken(true);
@@ -3222,9 +3259,12 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ // Return null if the back chain is not present.
+ bool HasBackChain = MF.getFunction().hasFnAttribute("backchain");
+ if (TFL->usePackedStack(MF) && !HasBackChain)
+ return DAG.getConstant(0, DL, PtrVT);
+
// By definition, the frame address is the address of the back chain.
- auto *TFL =
- static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
@@ -3355,9 +3395,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
SDLoc DL(Op);
return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
- /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
- /*isTailCall*/false,
- MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
+ Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
+ /*isTailCall*/ false, MachinePointerInfo(DstSV),
+ MachinePointerInfo(SrcSV));
}
SDValue SystemZTargetLowering::
@@ -3398,10 +3438,17 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
// Get the new stack pointer value.
- SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
-
- // Copy the new stack pointer back.
- Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
+ SDValue NewSP;
+ if (hasInlineStackProbe(MF)) {
+ NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
+ DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
+ Chain = NewSP.getValue(1);
+ }
+ else {
+ NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
+ // Copy the new stack pointer back.
+ Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
+ }
// The allocated data lives above the 160 bytes allocated for the standard
// frame, plus any outgoing stack arguments. We don't know how much that
@@ -3995,7 +4042,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
}
MachineMemOperand::Flags
-SystemZTargetLowering::getMMOFlags(const Instruction &I) const {
+SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
// Because of how we convert atomic_load and atomic_store to normal loads and
// stores in the DAG, we need to ensure that the MMOs are marked volatile
// since DAGCombine hasn't been updated to account for atomic, but non
@@ -4362,7 +4409,7 @@ static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
}
// Bytes is a VPERM-like permute vector, except that -1 is used for
-// undefined bytes. Return true if it can be performed using VSLDI.
+// undefined bytes. Return true if it can be performed using VSLDB.
// When returning true, set StartIndex to the shift amount and OpNo0
// and OpNo1 to the VPERM operands that should be used as the first
// and second shift operand respectively.
@@ -4420,23 +4467,86 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
return Op;
}
+static bool isZeroVector(SDValue N) {
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+ if (N->getOpcode() == ISD::SPLAT_VECTOR)
+ if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
+ return Op->getZExtValue() == 0;
+ return ISD::isBuildVectorAllZeros(N.getNode());
+}
+
+// Return the index of the zero/undef vector, or UINT32_MAX if not found.
+static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
+ for (unsigned I = 0; I < Num ; I++)
+ if (isZeroVector(Ops[I]))
+ return I;
+ return UINT32_MAX;
+}
+
// Bytes is a VPERM-like permute vector, except that -1 is used for
// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
-// VSLDI or VPERM.
+// VSLDB or VPERM.
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
SDValue *Ops,
const SmallVectorImpl<int> &Bytes) {
for (unsigned I = 0; I < 2; ++I)
Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
- // First see whether VSLDI can be used.
+ // First see whether VSLDB can be used.
unsigned StartIndex, OpNo0, OpNo1;
if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
Ops[OpNo1],
DAG.getTargetConstant(StartIndex, DL, MVT::i32));
- // Fall back on VPERM. Construct an SDNode for the permute vector.
+ // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
+ // eliminate a zero vector by reusing any zero index in the permute vector.
+ unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
+ if (ZeroVecIdx != UINT32_MAX) {
+ bool MaskFirst = true;
+ int ZeroIdx = -1;
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
+ unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
+ unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
+ if (OpNo == ZeroVecIdx && I == 0) {
+ // If the first byte is zero, use mask as first operand.
+ ZeroIdx = 0;
+ break;
+ }
+ if (OpNo != ZeroVecIdx && Byte == 0) {
+ // If mask contains a zero, use it by placing that vector first.
+ ZeroIdx = I + SystemZ::VectorBytes;
+ MaskFirst = false;
+ break;
+ }
+ }
+ if (ZeroIdx != -1) {
+ SDValue IndexNodes[SystemZ::VectorBytes];
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
+ if (Bytes[I] >= 0) {
+ unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
+ unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
+ if (OpNo == ZeroVecIdx)
+ IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
+ else {
+ unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
+ IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
+ }
+ } else
+ IndexNodes[I] = DAG.getUNDEF(MVT::i32);
+ }
+ SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
+ SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
+ if (MaskFirst)
+ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
+ Mask);
+ else
+ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
+ Mask);
+ }
+ }
+
SDValue IndexNodes[SystemZ::VectorBytes];
for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
if (Bytes[I] >= 0)
@@ -4444,16 +4554,20 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
else
IndexNodes[I] = DAG.getUNDEF(MVT::i32);
SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
- return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
+ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
+ (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
}
namespace {
// Describes a general N-operand vector shuffle.
struct GeneralShuffle {
- GeneralShuffle(EVT vt) : VT(vt) {}
+ GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
void addUndef();
bool add(SDValue, unsigned);
SDValue getNode(SelectionDAG &, const SDLoc &);
+ void tryPrepareForUnpack();
+ bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
+ SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
// The operands of the shuffle.
SmallVector<SDValue, SystemZ::VectorBytes> Ops;
@@ -4465,6 +4579,9 @@ struct GeneralShuffle {
// The type of the shuffle result.
EVT VT;
+
+ // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
+ unsigned UnpackFromEltSize;
};
}
@@ -4547,6 +4664,9 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
if (Ops.size() == 0)
return DAG.getUNDEF(VT);
+ // Use a single unpack if possible as the last operation.
+ tryPrepareForUnpack();
+
// Make sure that there are at least two shuffle operands.
if (Ops.size() == 1)
Ops.push_back(DAG.getUNDEF(MVT::v16i8));
@@ -4612,13 +4732,117 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
// to VPERM.
unsigned OpNo0, OpNo1;
SDValue Op;
- if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
+ if (unpackWasPrepared() && Ops[1].isUndef())
+ Op = Ops[0];
+ else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
else
Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
+
+ Op = insertUnpackIfPrepared(DAG, DL, Op);
+
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
+#ifndef NDEBUG
+static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
+ dbgs() << Msg.c_str() << " { ";
+ for (unsigned i = 0; i < Bytes.size(); i++)
+ dbgs() << Bytes[i] << " ";
+ dbgs() << "}\n";
+}
+#endif
+
+// If the Bytes vector matches an unpack operation, prepare to do the unpack
+// after all else by removing the zero vector and the effect of the unpack on
+// Bytes.
+void GeneralShuffle::tryPrepareForUnpack() {
+ uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
+ if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
+ return;
+
+ // Only do this if removing the zero vector reduces the depth, otherwise
+ // the critical path will increase with the final unpack.
+ if (Ops.size() > 2 &&
+ Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
+ return;
+
+ // Find an unpack that would allow removing the zero vector from Ops.
+ UnpackFromEltSize = 1;
+ for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
+ bool MatchUnpack = true;
+ SmallVector<int, SystemZ::VectorBytes> SrcBytes;
+ for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
+ unsigned ToEltSize = UnpackFromEltSize * 2;
+ bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
+ if (!IsZextByte)
+ SrcBytes.push_back(Bytes[Elt]);
+ if (Bytes[Elt] != -1) {
+ unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
+ if (IsZextByte != (OpNo == ZeroVecOpNo)) {
+ MatchUnpack = false;
+ break;
+ }
+ }
+ }
+ if (MatchUnpack) {
+ if (Ops.size() == 2) {
+ // Don't use unpack if a single source operand needs rearrangement.
+ for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
+ if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
+ UnpackFromEltSize = UINT_MAX;
+ return;
+ }
+ }
+ break;
+ }
+ }
+ if (UnpackFromEltSize > 4)
+ return;
+
+ LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
+ << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
+ << ".\n";
+ dumpBytes(Bytes, "Original Bytes vector:"););
+
+ // Apply the unpack in reverse to the Bytes array.
+ unsigned B = 0;
+ for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
+ Elt += UnpackFromEltSize;
+ for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
+ Bytes[B] = Bytes[Elt];
+ }
+ while (B < SystemZ::VectorBytes)
+ Bytes[B++] = -1;
+
+ // Remove the zero vector from Ops
+ Ops.erase(&Ops[ZeroVecOpNo]);
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
+ if (Bytes[I] >= 0) {
+ unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
+ if (OpNo > ZeroVecOpNo)
+ Bytes[I] -= SystemZ::VectorBytes;
+ }
+
+ LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
+ dbgs() << "\n";);
+}
+
+SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
+ const SDLoc &DL,
+ SDValue Op) {
+ if (!unpackWasPrepared())
+ return Op;
+ unsigned InBits = UnpackFromEltSize * 8;
+ EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
+ SystemZ::VectorBits / InBits);
+ SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
+ unsigned OutBits = InBits * 2;
+ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
+ SystemZ::VectorBits / OutBits);
+ return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
+}
+
// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
static bool isScalarToVector(SDValue Op) {
for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
@@ -5013,9 +5237,8 @@ SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getNode(ISD::BITCAST, DL, VT, Res);
}
-SDValue
-SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
- unsigned UnpackHigh) const {
+SDValue SystemZTargetLowering::
+lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
SDValue PackedOp = Op.getOperand(0);
EVT OutVT = Op.getValueType();
EVT InVT = PackedOp.getValueType();
@@ -5025,11 +5248,39 @@ SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
FromBits *= 2;
EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
SystemZ::VectorBits / FromBits);
- PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
+ PackedOp =
+ DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
} while (FromBits != ToBits);
return PackedOp;
}
+// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
+SDValue SystemZTargetLowering::
+lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
+ SDValue PackedOp = Op.getOperand(0);
+ SDLoc DL(Op);
+ EVT OutVT = Op.getValueType();
+ EVT InVT = PackedOp.getValueType();
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned OutNumElts = OutVT.getVectorNumElements();
+ unsigned NumInPerOut = InNumElts / OutNumElts;
+
+ SDValue ZeroVec =
+ DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
+
+ SmallVector<int, 16> Mask(InNumElts);
+ unsigned ZeroVecElt = InNumElts;
+ for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
+ unsigned MaskElt = PackedElt * NumInPerOut;
+ unsigned End = MaskElt + NumInPerOut - 1;
+ for (; MaskElt < End; MaskElt++)
+ Mask[MaskElt] = ZeroVecElt++;
+ Mask[MaskElt] = PackedElt;
+ }
+ SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
+ return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
+}
+
SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
unsigned ByScalar) const {
// Look for cases where a vector shift can use the *_BY_SCALAR form.
@@ -5195,9 +5446,9 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::SIGN_EXTEND_VECTOR_INREG:
- return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
+ return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
case ISD::ZERO_EXTEND_VECTOR_INREG:
- return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
+ return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
case ISD::SHL:
return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
case ISD::SRL:
@@ -5315,6 +5566,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(BR_CCMASK);
OPCODE(SELECT_CCMASK);
OPCODE(ADJDYNALLOC);
+ OPCODE(PROBED_ALLOCA);
OPCODE(POPCNT);
OPCODE(SMUL_LOHI);
OPCODE(UMUL_LOHI);
@@ -6056,6 +6308,32 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
return SDValue();
}
+SDValue SystemZTargetLowering::combineINT_TO_FP(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ if (DCI.Level != BeforeLegalizeTypes)
+ return SDValue();
+ unsigned Opcode = N->getOpcode();
+ EVT OutVT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op = N->getOperand(0);
+ unsigned OutScalarBits = OutVT.getScalarSizeInBits();
+ unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
+
+ // Insert an extension before type-legalization to avoid scalarization, e.g.:
+ // v2f64 = uint_to_fp v2i16
+ // =>
+ // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
+ if (OutVT.isVector() && OutScalarBits > InScalarBits) {
+ MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(OutVT.getScalarSizeInBits()),
+ OutVT.getVectorNumElements());
+ unsigned ExtOpcode =
+ (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
+ SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
+ return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
+ }
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::combineBSWAP(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -6243,15 +6521,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
return false;
// Compute the effective CC mask for the new branch or select.
- switch (CCMask) {
- case SystemZ::CCMASK_CMP_EQ: break;
- case SystemZ::CCMASK_CMP_NE: break;
- case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break;
- case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break;
- case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break;
- case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break;
- default: return false;
- }
+ CCMask = SystemZ::reverseCCMask(CCMask);
// Return the updated CCReg link.
CCReg = IPM->getOperand(0);
@@ -6367,6 +6637,34 @@ SDValue SystemZTargetLowering::combineIntDIVREM(
return SDValue();
}
+SDValue SystemZTargetLowering::combineINTRINSIC(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ unsigned Id = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (Id) {
+ // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
+ // or larger is simply a vector load.
+ case Intrinsic::s390_vll:
+ case Intrinsic::s390_vlrl:
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (C->getZExtValue() >= 15)
+ return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
+ N->getOperand(3), MachinePointerInfo());
+ break;
+ // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
+ case Intrinsic::s390_vstl:
+ case Intrinsic::s390_vstrl:
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ if (C->getZExtValue() >= 15)
+ return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
+ N->getOperand(4), MachinePointerInfo());
+ break;
+ }
+
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
return N->getOperand(0);
@@ -6391,6 +6689,8 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
@@ -6399,6 +6699,8 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM: return combineIntDIVREM(N, DCI);
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
}
return SDValue();
@@ -6580,7 +6882,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
if (IsLogical) {
- Known = Known.zext(BitWidth, true);
+ Known = Known.zext(BitWidth);
} else
Known = Known.sext(BitWidth);
break;
@@ -6609,7 +6911,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
// Known has the width of the source operand(s). Adjust if needed to match
// the passed bitwidth.
if (Known.getBitWidth() != BitWidth)
- Known = Known.zextOrTrunc(BitWidth, false);
+ Known = Known.anyextOrTrunc(BitWidth);
}
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
@@ -6690,38 +6992,29 @@ SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
return 1;
}
+unsigned
+SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ unsigned StackAlign = TFI->getStackAlignment();
+ assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
+ "Unexpected stack alignment");
+ // The default stack probe size is 4096 if the function has no
+ // stack-probe-size attribute.
+ unsigned StackProbeSize = 4096;
+ const Function &Fn = MF.getFunction();
+ if (Fn.hasFnAttribute("stack-probe-size"))
+ Fn.getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, StackProbeSize);
+ // Round down to the stack alignment.
+ StackProbeSize &= ~(StackAlign - 1);
+ return StackProbeSize ? StackProbeSize : StackAlign;
+}
+
//===----------------------------------------------------------------------===//
// Custom insertion
//===----------------------------------------------------------------------===//
-// Create a new basic block after MBB.
-static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
- MachineFunction &MF = *MBB->getParent();
- MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
- MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
- return NewMBB;
-}
-
-// Split MBB after MI and return the new block (the one that contains
-// instructions after MI).
-static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
- MachineBasicBlock *MBB) {
- MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
- NewMBB->splice(NewMBB->begin(), MBB,
- std::next(MachineBasicBlock::iterator(MI)), MBB->end());
- NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
- return NewMBB;
-}
-
-// Split MBB before MI and return the new block (the one that contains MI).
-static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
- MachineBasicBlock *MBB) {
- MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
- NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
- NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
- return NewMBB;
-}
-
// Force base value Base into a register before MI. Return the register.
static Register forceReg(MachineInstr &MI, MachineOperand &Base,
const SystemZInstrInfo *TII) {
@@ -6859,8 +7152,6 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
for (MachineBasicBlock::iterator NextMIIt =
std::next(MachineBasicBlock::iterator(MI));
NextMIIt != MBB->end(); ++NextMIIt) {
- if (NextMIIt->definesRegister(SystemZ::CC))
- break;
if (isSelectPseudo(*NextMIIt)) {
assert(NextMIIt->getOperand(3).getImm() == CCValid &&
"Bad CCValid operands since CC was not redefined.");
@@ -6871,6 +7162,9 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
}
break;
}
+ if (NextMIIt->definesRegister(SystemZ::CC) ||
+ NextMIIt->usesCustomInsertionHook())
+ break;
bool User = false;
for (auto SelMI : Selects)
if (NextMIIt->readsVirtualRegister(SelMI->getOperand(0).getReg())) {
@@ -6891,8 +7185,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
bool CCKilled =
(LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockAfter(LastMI, MBB);
- MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
+ MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
// Unless CC was killed in the last Select instruction, mark it as
// live-in to both FalseMBB and JoinMBB.
@@ -6985,8 +7279,8 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
CCMask ^= CCValid;
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
// Unless CC was killed in the CondStore instruction, mark it as
// live-in to both FalseMBB and JoinMBB.
@@ -7069,8 +7363,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
// Insert a basic block for the main loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
// StartMBB:
// ...
@@ -7187,10 +7481,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
// Insert 3 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
- MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
- MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
+ MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
// StartMBB:
// ...
@@ -7298,9 +7592,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
// Insert 2 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
- MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
// StartMBB:
// ...
@@ -7460,7 +7754,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
// When generating more than one CLC, all but the last will need to
// branch to the end when a difference is found.
MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
- splitBlockAfter(MI, MBB) : nullptr);
+ SystemZ::splitBlockAfter(MI, MBB) : nullptr);
// Check for the loop form, in which operand 5 is the trip count.
if (MI.getNumExplicitOperands() > 5) {
@@ -7484,9 +7778,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
Register NextCountReg = MRI.createVirtualRegister(RC);
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
- MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *NextMBB =
+ (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
// StartMBB:
// # fall through to LoopMMB
@@ -7602,7 +7897,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
// If there's another CLC to go, branch to the end if a difference
// was found.
if (EndMBB && Length > 0) {
- MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
.addMBB(EndMBB);
@@ -7642,8 +7937,8 @@ MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
uint64_t End2Reg = MRI.createVirtualRegister(RC);
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
- MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
// StartMBB:
// # fall through to LoopMMB
@@ -7754,6 +8049,97 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
return MBB;
}
+MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
+ MachineInstr &MI, MachineBasicBlock *MBB) const {
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ DebugLoc DL = MI.getDebugLoc();
+ const unsigned ProbeSize = getStackProbeSize(MF);
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SizeReg = MI.getOperand(2).getReg();
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
+ MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
+ MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
+ MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
+
+ MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
+
+ Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+
+ // LoopTestMBB
+ // BRC TailTestMBB
+ // # fallthrough to LoopBodyMBB
+ StartMBB->addSuccessor(LoopTestMBB);
+ MBB = LoopTestMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
+ .addReg(SizeReg)
+ .addMBB(StartMBB)
+ .addReg(IncReg)
+ .addMBB(LoopBodyMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
+ .addReg(PHIReg)
+ .addImm(ProbeSize);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT)
+ .addMBB(TailTestMBB);
+ MBB->addSuccessor(LoopBodyMBB);
+ MBB->addSuccessor(TailTestMBB);
+
+ // LoopBodyMBB: Allocate and probe by means of a volatile compare.
+ // J LoopTestMBB
+ MBB = LoopBodyMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
+ .addReg(PHIReg)
+ .addImm(ProbeSize);
+ BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
+ .addReg(SystemZ::R15D)
+ .addImm(ProbeSize);
+ BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
+ .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
+ .setMemRefs(VolLdMMO);
+ BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
+ MBB->addSuccessor(LoopTestMBB);
+
+ // TailTestMBB
+ // BRC DoneMBB
+ // # fallthrough to TailMBB
+ MBB = TailTestMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
+ .addReg(PHIReg)
+ .addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
+ .addMBB(DoneMBB);
+ MBB->addSuccessor(TailMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ // TailMBB
+ // # fallthrough to DoneMBB
+ MBB = TailMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
+ .addReg(SystemZ::R15D)
+ .addReg(PHIReg);
+ BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
+ .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
+ .setMemRefs(VolLdMMO);
+ MBB->addSuccessor(DoneMBB);
+
+ // DoneMBB
+ MBB = DoneMBB;
+ BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
+ .addReg(SystemZ::R15D);
+
+ MI.eraseFromParent();
+ return DoneMBB;
+}
+
MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *MBB) const {
switch (MI.getOpcode()) {
@@ -8014,6 +8400,9 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::LTXBRCompare_VecPseudo:
return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
+ case SystemZ::PROBED_ALLOCA:
+ return emitProbedAlloca(MI, MBB);
+
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, MBB);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index defcaa6eb6eb..27637762296a 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -83,6 +83,10 @@ enum NodeType : unsigned {
// base of the dynamically-allocatable area.
ADJDYNALLOC,
+ // For allocating stack space when using stack clash protector.
+ // Allocation is performed by block, and each block is probed.
+ PROBED_ALLOCA,
+
// Count number of bits set in operand 0 per byte.
POPCNT,
@@ -393,6 +397,8 @@ public:
explicit SystemZTargetLowering(const TargetMachine &TM,
const SystemZSubtarget &STI);
+ bool useSoftFloat() const override;
+
// Override TargetLowering.
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
return MVT::i32;
@@ -426,6 +432,7 @@ public:
EVT VT) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
+ bool hasInlineStackProbe(MachineFunction &MF) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
@@ -437,6 +444,14 @@ public:
bool *Fast) const override;
bool isTruncateFree(Type *, Type *) const override;
bool isTruncateFree(EVT, EVT) const override;
+
+ bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
+ bool MathUsed) const override {
+ // Form add and sub with overflow intrinsics regardless of any extra
+ // users of the math result.
+ return VT == MVT::i32 || VT == MVT::i64;
+ }
+
const char *getTargetNodeName(unsigned Opcode) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
@@ -471,16 +486,19 @@ public:
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
+ Register getRegisterByName(const char *RegName, LLT VT,
+ const MachineFunction &MF) const override;
+
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
- unsigned
+ Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override {
return SystemZ::R6D;
}
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
- unsigned
+ Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
return SystemZ::R7D;
}
@@ -543,6 +561,8 @@ public:
return true;
}
+ unsigned getStackProbeSize(MachineFunction &MF) const;
+
private:
const SystemZSubtarget &Subtarget;
@@ -607,8 +627,8 @@ private:
SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
- unsigned UnpackHigh) const;
+ SDValue lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
bool canTreatAsByteVector(EVT VT) const;
@@ -629,11 +649,13 @@ private:
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineINT_TO_FP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineINTRINSIC(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue unwrapAddress(SDValue N) const override;
@@ -676,8 +698,11 @@ private:
MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI,
MachineBasicBlock *MBB,
unsigned Opcode) const;
+ MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
+ MachineBasicBlock *MBB) const;
- MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override;
+ MachineMemOperand::Flags
+ getTargetMMOFlags(const Instruction &I) const override;
const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
};
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
index ec7639e71f81..9fc786f92635 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
namespace llvm {
@@ -36,7 +35,7 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) {
int64_t Offset = 0;
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags,
- MFFrame.getObjectSize(FI), MFFrame.getObjectAlignment(FI));
+ MFFrame.getObjectSize(FI), MFFrame.getObjectAlign(FI));
return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO);
}
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 6d03274fe8a6..337164d55e5f 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -438,8 +438,8 @@ let Uses = [FPC], mayRaiseFPException = 1,
def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64, FP64>;
def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>;
}
- def AEB : BinaryRXE<"aeb", 0xED0A, any_fadd, FP32, load, 4>;
- def ADB : BinaryRXE<"adb", 0xED1A, any_fadd, FP64, load, 8>;
+ defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, any_fadd, FP32, load, 4>;
+ defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, any_fadd, FP64, load, 8>;
}
// Subtraction.
@@ -449,8 +449,8 @@ let Uses = [FPC], mayRaiseFPException = 1,
def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64, FP64>;
def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>;
- def SEB : BinaryRXE<"seb", 0xED0B, any_fsub, FP32, load, 4>;
- def SDB : BinaryRXE<"sdb", 0xED1B, any_fsub, FP64, load, 8>;
+ defm SEB : BinaryRXEAndPseudo<"seb", 0xED0B, any_fsub, FP32, load, 4>;
+ defm SDB : BinaryRXEAndPseudo<"sdb", 0xED1B, any_fsub, FP64, load, 8>;
}
// Multiplication.
@@ -460,8 +460,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MDBR : BinaryRRE<"mdbr", 0xB31C, any_fmul, FP64, FP64>;
def MXBR : BinaryRRE<"mxbr", 0xB34C, any_fmul, FP128, FP128>;
}
- def MEEB : BinaryRXE<"meeb", 0xED17, any_fmul, FP32, load, 4>;
- def MDB : BinaryRXE<"mdb", 0xED1C, any_fmul, FP64, load, 8>;
+ defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, any_fmul, FP32, load, 4>;
+ defm MDB : BinaryRXEAndPseudo<"mdb", 0xED1C, any_fmul, FP64, load, 8>;
}
// f64 multiplication of two FP32 registers.
@@ -503,8 +503,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;
- def MAEB : TernaryRXF<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
- def MADB : TernaryRXF<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
+ defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
+ defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
}
// Fused multiply-subtract.
@@ -512,8 +512,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;
def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>;
- def MSEB : TernaryRXF<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
- def MSDB : TernaryRXF<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
+ defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
+ defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
}
// Division.
@@ -522,8 +522,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64, FP64>;
def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>;
- def DEB : BinaryRXE<"deb", 0xED0D, any_fdiv, FP32, load, 4>;
- def DDB : BinaryRXE<"ddb", 0xED1D, any_fdiv, FP64, load, 8>;
+ defm DEB : BinaryRXEAndPseudo<"deb", 0xED0D, any_fdiv, FP32, load, 4>;
+ defm DDB : BinaryRXEAndPseudo<"ddb", 0xED1D, any_fdiv, FP64, load, 8>;
}
// Divide to integer.
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index f064d33ac2f3..50f1e09c6ee5 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -2334,49 +2334,49 @@ class FixedCmpBranchRSYb<CondVariant V, string mnemonic, bits<16> opcode,
class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls>
: InstRIb<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$RI2),
- mnemonic##"\t$R1, $RI2", []> {
+ mnemonic#"\t$R1, $RI2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class BranchUnaryRIL<string mnemonic, bits<12> opcode, RegisterOperand cls>
: InstRILb<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget32:$RI2),
- mnemonic##"\t$R1, $RI2", []> {
+ mnemonic#"\t$R1, $RI2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class BranchUnaryRR<string mnemonic, bits<8> opcode, RegisterOperand cls>
: InstRR<opcode, (outs cls:$R1), (ins cls:$R1src, GR64:$R2),
- mnemonic##"\t$R1, $R2", []> {
+ mnemonic#"\t$R1, $R2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class BranchUnaryRRE<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstRRE<opcode, (outs cls:$R1), (ins cls:$R1src, GR64:$R2),
- mnemonic##"\t$R1, $R2", []> {
+ mnemonic#"\t$R1, $R2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class BranchUnaryRX<string mnemonic, bits<8> opcode, RegisterOperand cls>
: InstRXa<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2),
- mnemonic##"\t$R1, $XBD2", []> {
+ mnemonic#"\t$R1, $XBD2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class BranchUnaryRXY<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstRXYa<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr20only:$XBD2),
- mnemonic##"\t$R1, $XBD2", []> {
+ mnemonic#"\t$R1, $XBD2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class BranchBinaryRSI<string mnemonic, bits<8> opcode, RegisterOperand cls>
: InstRSI<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, brtarget16:$RI2),
- mnemonic##"\t$R1, $R3, $RI2", []> {
+ mnemonic#"\t$R1, $R3, $RI2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
@@ -2384,7 +2384,7 @@ class BranchBinaryRSI<string mnemonic, bits<8> opcode, RegisterOperand cls>
class BranchBinaryRIEe<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstRIEe<opcode, (outs cls:$R1),
(ins cls:$R1src, cls:$R3, brtarget16:$RI2),
- mnemonic##"\t$R1, $R3, $RI2", []> {
+ mnemonic#"\t$R1, $R3, $RI2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
@@ -2392,7 +2392,7 @@ class BranchBinaryRIEe<string mnemonic, bits<16> opcode, RegisterOperand cls>
class BranchBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls>
: InstRSa<opcode, (outs cls:$R1),
(ins cls:$R1src, cls:$R3, bdaddr12only:$BD2),
- mnemonic##"\t$R1, $R3, $BD2", []> {
+ mnemonic#"\t$R1, $R3, $BD2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
@@ -2400,7 +2400,7 @@ class BranchBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls>
class BranchBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstRSYa<opcode,
(outs cls:$R1), (ins cls:$R1src, cls:$R3, bdaddr20only:$BD2),
- mnemonic##"\t$R1, $R3, $BD2", []> {
+ mnemonic#"\t$R1, $R3, $BD2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
@@ -2421,7 +2421,7 @@ class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
multiclass LoadMultipleRSPair<string mnemonic, bits<8> rsOpcode,
bits<16> rsyOpcode, RegisterOperand cls> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : LoadMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>;
let DispSize = "20" in
@@ -2487,7 +2487,7 @@ class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
SDPatternOperator operator, RegisterOperand cls,
bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>;
let DispSize = "20" in
@@ -2567,7 +2567,7 @@ class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
multiclass StoreMultipleRSPair<string mnemonic, bits<8> rsOpcode,
bits<16> rsyOpcode, RegisterOperand cls> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : StoreMultipleRS<mnemonic, rsOpcode, cls, bdaddr12pair>;
let DispSize = "20" in
@@ -2807,6 +2807,10 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,
let mayLoad = 1;
let AccessBytes = bytes;
let CCMaskLast = 1;
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let MemKey = mnemonic#cls;
+ let MemType = "target";
}
// Like CondUnaryRSY, but used for the raw assembly form. The condition-code
@@ -2884,7 +2888,7 @@ class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
SDPatternOperator operator, RegisterOperand cls,
bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bytes, bdxaddr12pair>;
let DispSize = "20" in
@@ -2907,13 +2911,15 @@ class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, ImmOpWithPattern imm>
class UnaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0,
- bits<4> m5 = 0>
+ bits<4> m5 = 0, string fp_mnemonic = "">
: InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2),
mnemonic#"\t$V1, $V2",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2)))]> {
let M3 = type;
let M4 = m4;
let M5 = m5;
+ let OpKey = fp_mnemonic#!subst("VR", "FP", !cast<string>(tr1.op));
+ let OpType = "reg";
}
class UnaryVRRaGeneric<string mnemonic, bits<16> opcode, bits<4> m4 = 0,
@@ -2948,7 +2954,7 @@ multiclass UnaryExtraVRRaSPair<string mnemonic, bits<16> opcode,
def : InstAlias<mnemonic#"\t$V1, $V2",
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, 0)>;
let Defs = [CC] in
- def S : UnaryVRRa<mnemonic##"s", opcode, operator_cc, tr1, tr2,
+ def S : UnaryVRRa<mnemonic#"s", opcode, operator_cc, tr1, tr2,
type, 0, 1>;
}
@@ -2992,17 +2998,17 @@ multiclass UnaryVRXAlign<string mnemonic, bits<16> opcode> {
class SideEffectBinaryRX<string mnemonic, bits<8> opcode,
RegisterOperand cls>
: InstRXa<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
- mnemonic##"\t$R1, $XBD2", []>;
+ mnemonic#"\t$R1, $XBD2", []>;
class SideEffectBinaryRXY<string mnemonic, bits<16> opcode,
RegisterOperand cls>
: InstRXYa<opcode, (outs), (ins cls:$R1, bdxaddr20only:$XBD2),
- mnemonic##"\t$R1, $XBD2", []>;
+ mnemonic#"\t$R1, $XBD2", []>;
class SideEffectBinaryRILPC<string mnemonic, bits<12> opcode,
RegisterOperand cls>
: InstRILb<opcode, (outs), (ins cls:$R1, pcrel32:$RI2),
- mnemonic##"\t$R1, $RI2", []> {
+ mnemonic#"\t$R1, $RI2", []> {
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more
// complex.
@@ -3045,16 +3051,16 @@ class SideEffectBinarySIL<string mnemonic, bits<16> opcode,
class SideEffectBinarySSa<string mnemonic, bits<8> opcode>
: InstSSa<opcode, (outs), (ins bdladdr12onlylen8:$BDL1, bdaddr12only:$BD2),
- mnemonic##"\t$BDL1, $BD2", []>;
+ mnemonic#"\t$BDL1, $BD2", []>;
class SideEffectBinarySSb<string mnemonic, bits<8> opcode>
: InstSSb<opcode,
(outs), (ins bdladdr12onlylen4:$BDL1, bdladdr12onlylen4:$BDL2),
- mnemonic##"\t$BDL1, $BDL2", []>;
+ mnemonic#"\t$BDL1, $BDL2", []>;
class SideEffectBinarySSf<string mnemonic, bits<8> opcode>
: InstSSf<opcode, (outs), (ins bdaddr12only:$BD1, bdladdr12onlylen8:$BDL2),
- mnemonic##"\t$BD1, $BDL2", []>;
+ mnemonic#"\t$BD1, $BDL2", []>;
class SideEffectBinarySSE<string mnemonic, bits<16> opcode>
: InstSSE<opcode, (outs), (ins bdaddr12only:$BD1, bdaddr12only:$BD2),
@@ -3211,6 +3217,8 @@ class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let CCMaskLast = 1;
let NumOpsKey = !subst("loc", "sel", mnemonic);
let NumOpsValue = "2";
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
}
// Like CondBinaryRRF, but used for the raw assembly form. The condition-code
@@ -3252,6 +3260,8 @@ class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let CCMaskLast = 1;
let NumOpsKey = mnemonic;
let NumOpsValue = "3";
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
}
// Like CondBinaryRRFa, but used for the raw assembly form. The condition-code
@@ -3299,7 +3309,7 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
ImmOpWithPattern imm> {
let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in
- def K : BinaryRIE<mnemonic##"k", opcode2, operator, cls, imm>,
+ def K : BinaryRIE<mnemonic#"k", opcode2, operator, cls, imm>,
Requires<[FeatureDistinctOps]>;
let NumOpsValue = "2" in
def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>;
@@ -3376,7 +3386,7 @@ multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
SDPatternOperator operator, RegisterOperand cls> {
let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in
- def K : BinaryRSY<mnemonic##"k", opcode2, operator, cls>,
+ def K : BinaryRSY<mnemonic#"k", opcode2, operator, cls>,
Requires<[FeatureDistinctOps]>;
let NumOpsValue = "2" in
def "" : BinaryRS<mnemonic, opcode1, operator, cls>;
@@ -3448,7 +3458,7 @@ class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
SDPatternOperator operator, RegisterOperand cls,
SDPatternOperator load, bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes,
bdxaddr12pair>;
@@ -3479,7 +3489,7 @@ class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,
bits<16> siyOpcode, SDPatternOperator operator,
Operand imm> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : BinarySI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;
let DispSize = "20" in
@@ -3575,7 +3585,7 @@ multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode,
def "" : BinaryVRRb<mnemonic, opcode, operator, tr1, tr2, type,
!and (modifier, 14)>;
let Defs = [CC] in
- def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ def S : BinaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type,
!add (!and (modifier, 14), 1)>;
}
@@ -3604,7 +3614,7 @@ multiclass BinaryExtraVRRbSPair<string mnemonic, bits<16> opcode,
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, 0)>;
let Defs = [CC] in
- def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, 1>;
+ def S : BinaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type, 1>;
}
multiclass BinaryExtraVRRbSPairGeneric<string mnemonic, bits<16> opcode> {
@@ -3619,7 +3629,7 @@ multiclass BinaryExtraVRRbSPairGeneric<string mnemonic, bits<16> opcode> {
class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m5 = 0,
- bits<4> m6 = 0>
+ bits<4> m6 = 0, string fp_mnemonic = "">
: InstVRRc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3),
mnemonic#"\t$V1, $V2, $V3",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
@@ -3627,6 +3637,8 @@ class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let M4 = type;
let M5 = m5;
let M6 = m6;
+ let OpKey = fp_mnemonic#"MemFold"#!subst("VR", "FP", !cast<string>(tr1.op));
+ let OpType = "reg";
}
class BinaryVRRcGeneric<string mnemonic, bits<16> opcode, bits<4> m5 = 0,
@@ -3655,7 +3667,7 @@ multiclass BinaryVRRcSPair<string mnemonic, bits<16> opcode,
def "" : BinaryVRRc<mnemonic, opcode, operator, tr1, tr2, type,
m5, !and (modifier, 14)>;
let Defs = [CC] in
- def S : BinaryVRRc<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ def S : BinaryVRRc<mnemonic#"s", opcode, operator_cc, tr1, tr2, type,
m5, !add (!and (modifier, 14), 1)>;
}
@@ -3752,7 +3764,7 @@ class StoreBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
multiclass StoreBinaryRSPair<string mnemonic, bits<8> rsOpcode,
bits<16> rsyOpcode, RegisterOperand cls,
bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : StoreBinaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>;
let DispSize = "20" in
@@ -3892,7 +3904,7 @@ class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
SDPatternOperator operator, RegisterOperand cls,
SDPatternOperator load, bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : CompareRX<mnemonic, rxOpcode, operator, cls,
load, bytes, bdxaddr12pair>;
@@ -3920,7 +3932,7 @@ class CompareRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
multiclass CompareRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
RegisterOperand cls, bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : CompareRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>;
let DispSize = "20" in
@@ -3931,7 +3943,7 @@ multiclass CompareRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
class CompareSSb<string mnemonic, bits<8> opcode>
: InstSSb<opcode,
(outs), (ins bdladdr12onlylen4:$BDL1, bdladdr12onlylen4:$BDL2),
- mnemonic##"\t$BDL1, $BDL2", []> {
+ mnemonic#"\t$BDL1, $BDL2", []> {
let isCompare = 1;
let mayLoad = 1;
}
@@ -3978,7 +3990,7 @@ multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
}
class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- TypedReg tr, bits<4> type>
+ TypedReg tr, bits<4> type, string fp_mnemonic = "">
: InstVRRa<opcode, (outs), (ins tr.op:$V1, tr.op:$V2),
mnemonic#"\t$V1, $V2",
[(set CC, (operator (tr.vt tr.op:$V1), (tr.vt tr.op:$V2)))]> {
@@ -3986,6 +3998,8 @@ class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let M3 = type;
let M4 = 0;
let M5 = 0;
+ let OpKey = fp_mnemonic#!subst("VR", "FP", !cast<string>(tr.op));
+ let OpType = "reg";
}
class CompareVRRaGeneric<string mnemonic, bits<16> opcode>
@@ -4043,7 +4057,7 @@ class TestVRRg<string mnemonic, bits<16> opcode>
class SideEffectTernarySSc<string mnemonic, bits<8> opcode>
: InstSSc<opcode, (outs), (ins bdladdr12onlylen4:$BDL1,
shift12only:$BD2, imm32zx4:$I3),
- mnemonic##"\t$BDL1, $BD2, $I3", []>;
+ mnemonic#"\t$BDL1, $BD2, $I3", []>;
class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
@@ -4179,7 +4193,7 @@ class TernaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
multiclass TernaryRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
RegisterOperand cls, bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : TernaryRS<mnemonic, rsOpcode, cls, bytes, bdaddr12pair>;
let DispSize = "20" in
@@ -4303,7 +4317,7 @@ multiclass TernaryOptVRRbSPair<string mnemonic, bits<16> opcode,
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, 0)>;
let Defs = [CC] in
- def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ def S : TernaryVRRb<mnemonic#"s", opcode, operator_cc, tr1, tr2, type,
imm32zx4even_timm, !add(!and (modifier, 14), 1)>;
def : InstAlias<mnemonic#"s\t$V1, $V2, $V3",
(!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
@@ -4371,7 +4385,7 @@ class TernaryVRRdGeneric<string mnemonic, bits<16> opcode>
}
// Ternary operation where the assembler mnemonic has an extra operand to
-// optionally allow specifiying arbitrary M6 values.
+// optionally allow specifying arbitrary M6 values.
multiclass TernaryExtraVRRd<string mnemonic, bits<16> opcode,
SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type> {
@@ -4399,7 +4413,8 @@ multiclass TernaryExtraVRRdGeneric<string mnemonic, bits<16> opcode> {
}
class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0>
+ TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0,
+ string fp_mnemonic = "">
: InstVRRe<opcode, (outs tr1.op:$V1),
(ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
mnemonic#"\t$V1, $V2, $V3, $V4",
@@ -4408,6 +4423,8 @@ class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
(tr1.vt tr1.op:$V4)))]> {
let M5 = m5;
let M6 = type;
+ let OpKey = fp_mnemonic#"MemFold"#!subst("VR", "FP", !cast<string>(tr1.op));
+ let OpType = "reg";
}
class TernaryVRReFloatGeneric<string mnemonic, bits<16> opcode>
@@ -4536,7 +4553,7 @@ multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode,
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, tr2.op:$V4, 0)>;
let Defs = [CC] in
- def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc,
+ def S : QuaternaryVRRd<mnemonic#"s", opcode, operator_cc,
tr1, tr2, tr2, tr2, type,
imm32zx4even_timm, !add (!and (modifier, 14), 1)>;
def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4",
@@ -4630,7 +4647,7 @@ class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
SDPatternOperator operator, RegisterOperand cls> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
let DispSize = "12" in
def "" : CmpSwapRS<mnemonic, rsOpcode, operator, cls, bdaddr12pair>;
let DispSize = "20" in
@@ -4650,13 +4667,13 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
: InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2),
- mnemonic##"\t$M1, $XBD2",
+ mnemonic#"\t$M1, $XBD2",
[(operator imm32zx4_timm:$M1, bdxaddr20only:$XBD2)]>;
class PrefetchRILPC<string mnemonic, bits<12> opcode,
SDPatternOperator operator>
: InstRILc<opcode, (outs), (ins imm32zx4_timm:$M1, pcrel32:$RI2),
- mnemonic##"\t$M1, $RI2",
+ mnemonic#"\t$M1, $RI2",
[(operator imm32zx4_timm:$M1, pcrel32:$RI2)]> {
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more
@@ -4765,7 +4782,9 @@ multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
AddressingMode mode>
: Pseudo<(outs cls:$R1), (ins cls:$R2, mode:$XBD2), []> {
- let OpKey = mnemonic#"rk"#cls;
+ let OpKey = !subst("mscrk", "msrkc",
+ !subst("msgcrk", "msgrkc",
+ mnemonic#"rk"#cls));
let OpType = "mem";
let MemKey = mnemonic#cls;
let MemType = "pseudo";
@@ -4775,6 +4794,40 @@ class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
let hasNoSchedulingInfo = 1;
}
+// Same as MemFoldPseudo but for mapping a W... vector instruction
+class MemFoldPseudo_FP<string mnemonic, RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode>
+ : MemFoldPseudo<mnemonic, cls, bytes, mode> {
+ let OpKey = mnemonic#"r"#"MemFold"#cls;
+}
+
+class MemFoldPseudo_FPTern<string mnemonic, RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode>
+ : Pseudo<(outs cls:$R1), (ins cls:$R2, cls:$R3, mode:$XBD2), []> {
+ let OpKey = mnemonic#"r"#"MemFold"#cls;
+ let OpType = "mem";
+ let MemKey = mnemonic#cls;
+ let MemType = "pseudo";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let HasIndex = 1;
+ let hasNoSchedulingInfo = 1;
+}
+
+// Same as MemFoldPseudo but for Load On Condition with CC operands.
+class MemFoldPseudo_CondMove<string mnemonic, RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode>
+ : Pseudo<(outs cls:$R1),
+ (ins cls:$R2, mode:$XBD2, cond4:$valid, cond4:$M3), []> {
+ let OpKey = !subst("loc", "sel", mnemonic)#"r"#cls;
+ let OpType = "mem";
+ let MemKey = mnemonic#cls;
+ let MemType = "pseudo";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let hasNoSchedulingInfo = 1;
+}
+
// Like CompareRI, but expanded after RA depending on the choice of register.
class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
ImmOpWithPattern imm>
@@ -4813,6 +4866,8 @@ class CondBinaryRRFPseudo<string mnemonic, RegisterOperand cls1,
let CCMaskLast = 1;
let NumOpsKey = !subst("loc", "sel", mnemonic);
let NumOpsValue = "2";
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
}
// Like CondBinaryRRFa, but expanded after RA depending on the choice of
@@ -4826,6 +4881,8 @@ class CondBinaryRRFaPseudo<string mnemonic, RegisterOperand cls1,
let CCMaskLast = 1;
let NumOpsKey = mnemonic;
let NumOpsValue = "3";
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
}
// Like CondBinaryRIE, but expanded after RA depending on the choice of
@@ -4842,8 +4899,9 @@ class CondBinaryRIEPseudo<RegisterOperand cls, ImmOpWithPattern imm>
// Like CondUnaryRSY, but expanded after RA depending on the choice of
// register.
-class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,
- bits<5> bytes, AddressingMode mode = bdaddr20only>
+class CondUnaryRSYPseudo<string mnemonic, SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only>
: Pseudo<(outs cls:$R1),
(ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3),
[(set cls:$R1,
@@ -4854,6 +4912,10 @@ class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,
let mayLoad = 1;
let AccessBytes = bytes;
let CCMaskLast = 1;
+ let OpKey = mnemonic#"r"#cls;
+ let OpType = "mem";
+ let MemKey = mnemonic#cls;
+ let MemType = "target";
}
// Like CondStoreRSY, but expanded after RA depending on the choice of
@@ -5039,7 +5101,6 @@ multiclass BinaryRXYAndPseudo<string mnemonic, bits<16> opcode,
SDPatternOperator operator, RegisterOperand cls,
SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr20only> {
-
def "" : BinaryRXY<mnemonic, opcode, operator, cls, load, bytes, mode> {
let MemKey = mnemonic#cls;
let MemType = "target";
@@ -5052,7 +5113,7 @@ multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode,
bits<16> rxyOpcode, SDPatternOperator operator,
RegisterOperand cls,
SDPatternOperator load, bits<5> bytes> {
- let DispKey = mnemonic ## #cls in {
+ let DispKey = mnemonic # cls in {
def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes,
bdxaddr12pair> {
let DispSize = "12";
@@ -5066,6 +5127,43 @@ multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode,
def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, bdxaddr12pair>;
}
+multiclass BinaryRXEAndPseudo<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, RegisterOperand cls,
+ SDPatternOperator load, bits<5> bytes> {
+ def "" : BinaryRXE<mnemonic, opcode, operator, cls, load, bytes> {
+ let MemKey = mnemonic#cls;
+ let MemType = "target";
+ }
+ def _MemFoldPseudo : MemFoldPseudo_FP<mnemonic, cls, bytes, bdxaddr12pair>;
+}
+
+multiclass TernaryRXFAndPseudo<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, RegisterOperand cls1,
+ RegisterOperand cls2, SDPatternOperator load,
+ bits<5> bytes> {
+ def "" : TernaryRXF<mnemonic, opcode, operator, cls1, cls2, load, bytes> {
+ let MemKey = mnemonic#cls1;
+ let MemType = "target";
+ }
+ def _MemFoldPseudo : MemFoldPseudo_FPTern<mnemonic, cls1, bytes, bdxaddr12pair>;
+}
+
+multiclass CondUnaryRSYPairAndMemFold<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only> {
+ defm "" : CondUnaryRSYPair<mnemonic, opcode, operator, cls, bytes, mode>;
+ def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>;
+}
+
+multiclass CondUnaryRSYPseudoAndMemFold<string mnemonic,
+ SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdaddr20only> {
+ def "" : CondUnaryRSYPseudo<mnemonic, operator, cls, bytes, mode>;
+ def _MemFoldPseudo : MemFoldPseudo_CondMove<mnemonic, cls, bytes, mode>;
+}
+
// Define an instruction that operates on two fixed-length blocks of memory,
// and associated pseudo instructions for operating on blocks of any size.
// The Sequence form uses a straight-line sequence of instructions and
@@ -5086,7 +5184,7 @@ multiclass MemorySS<string mnemonic, bits<8> opcode,
}
}
-// The same, but setting a CC result as comparion operator.
+// The same, but setting a CC result as comparison operator.
multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
SDPatternOperator sequence, SDPatternOperator loop> {
def "" : SideEffectBinarySSa<mnemonic, opcode>;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 97c8fa7aa32e..223cfcba2fac 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -513,8 +513,8 @@ unsigned SystemZInstrInfo::insertBranch(MachineBasicBlock &MBB,
return Count;
}
-bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &Mask,
+bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &Mask,
int &Value) const {
assert(MI.isCompare() && "Caller should have checked for a comparison");
@@ -532,8 +532,9 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Pred,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles, int &TrueCycles,
+ Register DstReg, Register TrueReg,
+ Register FalseReg, int &CondCycles,
+ int &TrueCycles,
int &FalseCycles) const {
// Not all subtargets have LOCR instructions.
if (!STI.hasLoadStoreOnCond())
@@ -565,10 +566,10 @@ bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL, unsigned DstReg,
+ const DebugLoc &DL, Register DstReg,
ArrayRef<MachineOperand> Pred,
- unsigned TrueReg,
- unsigned FalseReg) const {
+ Register TrueReg,
+ Register FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
@@ -606,7 +607,7 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
}
bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
- unsigned Reg,
+ Register Reg,
MachineRegisterInfo *MRI) const {
unsigned DefOpc = DefMI.getOpcode();
if (DefOpc != SystemZ::LHIMux && DefOpc != SystemZ::LHI &&
@@ -819,18 +820,11 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- // Move CC value from/to a GR32.
- if (SrcReg == SystemZ::CC) {
- auto MIB = BuildMI(MBB, MBBI, DL, get(SystemZ::IPM), DestReg);
- if (KillSrc) {
- const MachineFunction *MF = MBB.getParent();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- MIB->addRegisterKilled(SrcReg, TRI);
- }
- return;
- }
+ // Move CC value from a GR32.
if (DestReg == SystemZ::CC) {
- BuildMI(MBB, MBBI, DL, get(SystemZ::TMLH))
+ unsigned Opcode =
+ SystemZ::GR32BitRegClass.contains(SrcReg) ? SystemZ::TMLH : SystemZ::TMHH;
+ BuildMI(MBB, MBBI, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(KillSrc))
.addImm(3 << (SystemZ::IPM_CC - 16));
return;
@@ -855,12 +849,6 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opcode = SystemZ::VLR;
else if (SystemZ::AR32BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::CPYA;
- else if (SystemZ::AR32BitRegClass.contains(DestReg) &&
- SystemZ::GR32BitRegClass.contains(SrcReg))
- Opcode = SystemZ::SAR;
- else if (SystemZ::GR32BitRegClass.contains(DestReg) &&
- SystemZ::AR32BitRegClass.contains(SrcReg))
- Opcode = SystemZ::EAR;
else
llvm_unreachable("Impossible reg-to-reg copy");
@@ -869,7 +857,7 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
void SystemZInstrInfo::storeRegToStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
bool isKill, int FrameIdx, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -884,7 +872,7 @@ void SystemZInstrInfo::storeRegToStackSlot(
}
void SystemZInstrInfo::loadRegFromStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
int FrameIdx, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -1005,33 +993,36 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
MachineBasicBlock::iterator InsertPt, int FrameIndex,
LiveIntervals *LIS, VirtRegMap *VRM) const {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Size = MFI.getObjectSize(FrameIndex);
unsigned Opcode = MI.getOpcode();
+ // Check CC liveness if new instruction introduces a dead def of CC.
+ MCRegUnitIterator CCUnit(SystemZ::CC, TRI);
+ SlotIndex MISlot = SlotIndex();
+ LiveRange *CCLiveRange = nullptr;
+ bool CCLiveAtMI = true;
+ if (LIS) {
+ MISlot = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
+ CCLiveRange = &LIS->getRegUnit(*CCUnit);
+ CCLiveAtMI = CCLiveRange->liveAt(MISlot);
+ }
+ ++CCUnit;
+ assert(!CCUnit.isValid() && "CC only has one reg unit.");
+
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
- if (LIS != nullptr && (Opcode == SystemZ::LA || Opcode == SystemZ::LAY) &&
+ if (!CCLiveAtMI && (Opcode == SystemZ::LA || Opcode == SystemZ::LAY) &&
isInt<8>(MI.getOperand(2).getImm()) && !MI.getOperand(3).getReg()) {
-
- // Check CC liveness, since new instruction introduces a dead
- // def of CC.
- MCRegUnitIterator CCUnit(SystemZ::CC, TRI);
- LiveRange &CCLiveRange = LIS->getRegUnit(*CCUnit);
- ++CCUnit;
- assert(!CCUnit.isValid() && "CC only has one reg unit.");
- SlotIndex MISlot =
- LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
- if (!CCLiveRange.liveAt(MISlot)) {
- // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST
- MachineInstr *BuiltMI = BuildMI(*InsertPt->getParent(), InsertPt,
- MI.getDebugLoc(), get(SystemZ::AGSI))
- .addFrameIndex(FrameIndex)
- .addImm(0)
- .addImm(MI.getOperand(2).getImm());
- BuiltMI->findRegisterDefOperand(SystemZ::CC)->setIsDead(true);
- CCLiveRange.createDeadDef(MISlot, LIS->getVNInfoAllocator());
- return BuiltMI;
- }
+ // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST
+ MachineInstr *BuiltMI = BuildMI(*InsertPt->getParent(), InsertPt,
+ MI.getDebugLoc(), get(SystemZ::AGSI))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addImm(MI.getOperand(2).getImm());
+ BuiltMI->findRegisterDefOperand(SystemZ::CC)->setIsDead(true);
+ CCLiveRange->createDeadDef(MISlot, LIS->getVNInfoAllocator());
+ return BuiltMI;
}
return nullptr;
}
@@ -1090,6 +1081,32 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
return BuiltMI;
}
+ unsigned MemImmOpc = 0;
+ switch (Opcode) {
+ case SystemZ::LHIMux:
+ case SystemZ::LHI: MemImmOpc = SystemZ::MVHI; break;
+ case SystemZ::LGHI: MemImmOpc = SystemZ::MVGHI; break;
+ case SystemZ::CHIMux:
+ case SystemZ::CHI: MemImmOpc = SystemZ::CHSI; break;
+ case SystemZ::CGHI: MemImmOpc = SystemZ::CGHSI; break;
+ case SystemZ::CLFIMux:
+ case SystemZ::CLFI:
+ if (isUInt<16>(MI.getOperand(1).getImm()))
+ MemImmOpc = SystemZ::CLFHSI;
+ break;
+ case SystemZ::CLGFI:
+ if (isUInt<16>(MI.getOperand(1).getImm()))
+ MemImmOpc = SystemZ::CLGHSI;
+ break;
+ default: break;
+ }
+ if (MemImmOpc)
+ return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
+ get(MemImmOpc))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addImm(MI.getOperand(1).getImm());
+
if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) {
bool Op0IsGPR = (Opcode == SystemZ::LGDR);
bool Op1IsGPR = (Opcode == SystemZ::LDGR);
@@ -1159,57 +1176,144 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
}
// If the spilled operand is the final one or the instruction is
- // commutable, try to change <INSN>R into <INSN>.
+ // commutable, try to change <INSN>R into <INSN>. Don't introduce a def of
+ // CC if it is live and MI does not define it.
unsigned NumOps = MI.getNumExplicitOperands();
int MemOpcode = SystemZ::getMemOpcode(Opcode);
+ if (MemOpcode == -1 ||
+ (CCLiveAtMI && !MI.definesRegister(SystemZ::CC) &&
+ get(MemOpcode).hasImplicitDefOfPhysReg(SystemZ::CC)))
+ return nullptr;
+
+ // Check if all other vregs have a usable allocation in the case of vector
+ // to FP conversion.
+ const MCInstrDesc &MCID = MI.getDesc();
+ for (unsigned I = 0, E = MCID.getNumOperands(); I != E; ++I) {
+ const MCOperandInfo &MCOI = MCID.OpInfo[I];
+ if (MCOI.OperandType != MCOI::OPERAND_REGISTER || I == OpNum)
+ continue;
+ const TargetRegisterClass *RC = TRI->getRegClass(MCOI.RegClass);
+ if (RC == &SystemZ::VR32BitRegClass || RC == &SystemZ::VR64BitRegClass) {
+ Register Reg = MI.getOperand(I).getReg();
+ Register PhysReg = Register::isVirtualRegister(Reg)
+ ? (VRM ? VRM->getPhys(Reg) : Register())
+ : Reg;
+ if (!PhysReg ||
+ !(SystemZ::FP32BitRegClass.contains(PhysReg) ||
+ SystemZ::FP64BitRegClass.contains(PhysReg) ||
+ SystemZ::VF128BitRegClass.contains(PhysReg)))
+ return nullptr;
+ }
+ }
+ // Fused multiply and add/sub need to have the same dst and accumulator reg.
+ bool FusedFPOp = (Opcode == SystemZ::WFMADB || Opcode == SystemZ::WFMASB ||
+ Opcode == SystemZ::WFMSDB || Opcode == SystemZ::WFMSSB);
+ if (FusedFPOp) {
+ Register DstReg = VRM->getPhys(MI.getOperand(0).getReg());
+ Register AccReg = VRM->getPhys(MI.getOperand(3).getReg());
+ if (OpNum == 0 || OpNum == 3 || DstReg != AccReg)
+ return nullptr;
+ }
+
+ // Try to swap compare operands if possible.
+ bool NeedsCommute = false;
+ if ((MI.getOpcode() == SystemZ::CR || MI.getOpcode() == SystemZ::CGR ||
+ MI.getOpcode() == SystemZ::CLR || MI.getOpcode() == SystemZ::CLGR ||
+ MI.getOpcode() == SystemZ::WFCDB || MI.getOpcode() == SystemZ::WFCSB ||
+ MI.getOpcode() == SystemZ::WFKDB || MI.getOpcode() == SystemZ::WFKSB) &&
+ OpNum == 0 && prepareCompareSwapOperands(MI))
+ NeedsCommute = true;
+
+ bool CCOperands = false;
+ if (MI.getOpcode() == SystemZ::LOCRMux || MI.getOpcode() == SystemZ::LOCGR ||
+ MI.getOpcode() == SystemZ::SELRMux || MI.getOpcode() == SystemZ::SELGR) {
+ assert(MI.getNumOperands() == 6 && NumOps == 5 &&
+ "LOCR/SELR instruction operands corrupt?");
+ NumOps -= 2;
+ CCOperands = true;
+ }
// See if this is a 3-address instruction that is convertible to 2-address
// and suitable for folding below. Only try this with virtual registers
// and a provided VRM (during regalloc).
- bool NeedsCommute = false;
- if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) {
+ if (NumOps == 3 && SystemZ::getTargetMemOpcode(MemOpcode) != -1) {
if (VRM == nullptr)
- MemOpcode = -1;
+ return nullptr;
else {
- assert(NumOps == 3 && "Expected two source registers.");
Register DstReg = MI.getOperand(0).getReg();
Register DstPhys =
(Register::isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg);
Register SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg()
: ((OpNum == 1 && MI.isCommutable())
? MI.getOperand(2).getReg()
- : Register()));
+ : Register()));
if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg &&
Register::isVirtualRegister(SrcReg) &&
DstPhys == VRM->getPhys(SrcReg))
NeedsCommute = (OpNum == 1);
else
- MemOpcode = -1;
+ return nullptr;
}
}
- if (MemOpcode >= 0) {
- if ((OpNum == NumOps - 1) || NeedsCommute) {
- const MCInstrDesc &MemDesc = get(MemOpcode);
- uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
- assert(AccessBytes != 0 && "Size of access should be known");
- assert(AccessBytes <= Size && "Access outside the frame index");
- uint64_t Offset = Size - AccessBytes;
- MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
- MI.getDebugLoc(), get(MemOpcode));
+ if ((OpNum == NumOps - 1) || NeedsCommute || FusedFPOp) {
+ const MCInstrDesc &MemDesc = get(MemOpcode);
+ uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
+ assert(AccessBytes != 0 && "Size of access should be known");
+ assert(AccessBytes <= Size && "Access outside the frame index");
+ uint64_t Offset = Size - AccessBytes;
+ MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
+ MI.getDebugLoc(), get(MemOpcode));
+ if (MI.isCompare()) {
+ assert(NumOps == 2 && "Expected 2 register operands for a compare.");
+ MIB.add(MI.getOperand(NeedsCommute ? 1 : 0));
+ }
+ else if (FusedFPOp) {
+ MIB.add(MI.getOperand(0));
+ MIB.add(MI.getOperand(3));
+ MIB.add(MI.getOperand(OpNum == 1 ? 2 : 1));
+ }
+ else {
MIB.add(MI.getOperand(0));
if (NeedsCommute)
MIB.add(MI.getOperand(2));
else
for (unsigned I = 1; I < OpNum; ++I)
MIB.add(MI.getOperand(I));
- MIB.addFrameIndex(FrameIndex).addImm(Offset);
- if (MemDesc.TSFlags & SystemZII::HasIndex)
- MIB.addReg(0);
- transferDeadCC(&MI, MIB);
- transferMIFlag(&MI, MIB, MachineInstr::NoSWrap);
- return MIB;
}
+ MIB.addFrameIndex(FrameIndex).addImm(Offset);
+ if (MemDesc.TSFlags & SystemZII::HasIndex)
+ MIB.addReg(0);
+ if (CCOperands) {
+ unsigned CCValid = MI.getOperand(NumOps).getImm();
+ unsigned CCMask = MI.getOperand(NumOps + 1).getImm();
+ MIB.addImm(CCValid);
+ MIB.addImm(NeedsCommute ? CCMask ^ CCValid : CCMask);
+ }
+ if (MIB->definesRegister(SystemZ::CC) &&
+ (!MI.definesRegister(SystemZ::CC) ||
+ MI.registerDefIsDead(SystemZ::CC))) {
+ MIB->addRegisterDead(SystemZ::CC, TRI);
+ if (CCLiveRange)
+ CCLiveRange->createDeadDef(MISlot, LIS->getVNInfoAllocator());
+ }
+ // Constrain the register classes if converted from a vector opcode. The
+ // allocated regs are in an FP reg-class per previous check above.
+ for (const MachineOperand &MO : MIB->operands())
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ if (MRI.getRegClass(Reg) == &SystemZ::VR32BitRegClass)
+ MRI.setRegClass(Reg, &SystemZ::FP32BitRegClass);
+ else if (MRI.getRegClass(Reg) == &SystemZ::VR64BitRegClass)
+ MRI.setRegClass(Reg, &SystemZ::FP64BitRegClass);
+ else if (MRI.getRegClass(Reg) == &SystemZ::VR128BitRegClass)
+ MRI.setRegClass(Reg, &SystemZ::VF128BitRegClass);
+ }
+
+ transferDeadCC(&MI, MIB);
+ transferMIFlag(&MI, MIB, MachineInstr::NoSWrap);
+ transferMIFlag(&MI, MIB, MachineInstr::NoFPExcept);
+ return MIB;
}
return nullptr;
@@ -1718,6 +1822,80 @@ unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode,
return 0;
}
+bool SystemZInstrInfo::
+prepareCompareSwapOperands(MachineBasicBlock::iterator const MBBI) const {
+ assert(MBBI->isCompare() && MBBI->getOperand(0).isReg() &&
+ MBBI->getOperand(1).isReg() && !MBBI->mayLoad() &&
+ "Not a compare reg/reg.");
+
+ MachineBasicBlock *MBB = MBBI->getParent();
+ bool CCLive = true;
+ SmallVector<MachineInstr *, 4> CCUsers;
+ for (MachineBasicBlock::iterator Itr = std::next(MBBI);
+ Itr != MBB->end(); ++Itr) {
+ if (Itr->readsRegister(SystemZ::CC)) {
+ unsigned Flags = Itr->getDesc().TSFlags;
+ if ((Flags & SystemZII::CCMaskFirst) || (Flags & SystemZII::CCMaskLast))
+ CCUsers.push_back(&*Itr);
+ else
+ return false;
+ }
+ if (Itr->definesRegister(SystemZ::CC)) {
+ CCLive = false;
+ break;
+ }
+ }
+ if (CCLive) {
+ LivePhysRegs LiveRegs(*MBB->getParent()->getSubtarget().getRegisterInfo());
+ LiveRegs.addLiveOuts(*MBB);
+ if (LiveRegs.contains(SystemZ::CC))
+ return false;
+ }
+
+ // Update all CC users.
+ for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) {
+ unsigned Flags = CCUsers[Idx]->getDesc().TSFlags;
+ unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ?
+ 0 : CCUsers[Idx]->getNumExplicitOperands() - 2);
+ MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1);
+ unsigned NewCCMask = SystemZ::reverseCCMask(CCMaskMO.getImm());
+ CCMaskMO.setImm(NewCCMask);
+ }
+
+ return true;
+}
+
+unsigned SystemZ::reverseCCMask(unsigned CCMask) {
+ return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
+ (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
+ (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
+ (CCMask & SystemZ::CCMASK_CMP_UO));
+}
+
+MachineBasicBlock *SystemZ::emitBlockAfter(MachineBasicBlock *MBB) {
+ MachineFunction &MF = *MBB->getParent();
+ MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
+ MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
+ return NewMBB;
+}
+
+MachineBasicBlock *SystemZ::splitBlockAfter(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+ NewMBB->splice(NewMBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ return NewMBB;
+}
+
+MachineBasicBlock *SystemZ::splitBlockBefore(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+ NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
+ NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ return NewMBB;
+}
+
unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const {
if (!STI.hasLoadAndTrap())
return 0;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 8391970c7d9d..72dafc3c93c2 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -155,6 +155,20 @@ enum FusedCompareType {
namespace SystemZ {
int getTwoOperandOpcode(uint16_t Opcode);
int getTargetMemOpcode(uint16_t Opcode);
+
+// Return a version of comparison CC mask CCMask in which the LT and GT
+// actions are swapped.
+unsigned reverseCCMask(unsigned CCMask);
+
+// Create a new basic block after MBB.
+MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB);
+// Split MBB after MI and return the new block (the one that contains
+// instructions after MI).
+MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB);
+// Split MBB before MI and return the new block (the one that contains MI).
+MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB);
}
class SystemZInstrInfo : public SystemZGenInstrInfo {
@@ -219,15 +233,16 @@ public:
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
const DebugLoc &DL,
int *BytesAdded = nullptr) const override;
- bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &Mask, int &Value) const override;
- bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
- unsigned, unsigned, int&, int&, int&) const override;
+ bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int &Mask, int &Value) const override;
+ bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
+ Register, Register, Register, int &, int &,
+ int &) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const DebugLoc &DL, unsigned DstReg,
- ArrayRef<MachineOperand> Cond, unsigned TrueReg,
- unsigned FalseReg) const override;
- bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
+ const DebugLoc &DL, Register DstReg,
+ ArrayRef<MachineOperand> Cond, Register TrueReg,
+ Register FalseReg) const override;
+ bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const override;
bool isPredicable(const MachineInstr &MI) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
@@ -247,12 +262,12 @@ public:
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIdx,
+ Register DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
@@ -313,6 +328,12 @@ public:
SystemZII::FusedCompareType Type,
const MachineInstr *MI = nullptr) const;
+ // Try to find all CC users of the compare instruction (MBBI) and update
+ // all of them to maintain equivalent behavior after swapping the compare
+ // operands. Return false if not all users can be conclusively found and
+ // handled. The compare instruction is *not* changed.
+ bool prepareCompareSwapOperands(MachineBasicBlock::iterator MBBI) const;
+
// If Opcode is a LOAD opcode for with an associated LOAD AND TRAP
// operation exists, returh the opcode for the latter, otherwise return 0.
unsigned getLoadAndTrap(unsigned Opcode) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 9579dcc0d1b6..d5d56ecf6e47 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -29,6 +29,15 @@ let hasNoSchedulingInfo = 1, hasSideEffects = 1 in {
def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src),
[(set GR64:$dst, dynalloc12only:$src)]>;
+let Defs = [R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1,
+ usesCustomInserter = 1 in
+ def PROBED_ALLOCA : Pseudo<(outs GR64:$dst),
+ (ins GR64:$oldSP, GR64:$space),
+ [(set GR64:$dst, (z_probed_alloca GR64:$oldSP, GR64:$space))]>;
+
+let Defs = [R1D, R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1,
+ hasSideEffects = 1 in
+ def PROBED_STACKALLOC : Pseudo<(outs), (ins i64imm:$stacksize), []>;
//===----------------------------------------------------------------------===//
// Branch instructions
@@ -492,7 +501,7 @@ let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in {
let isCommutable = 1 in {
// Expands to SELR or SELFHR or a branch-and-move sequence,
// depending on the choice of registers.
- def SELRMux : CondBinaryRRFaPseudo<"selrmux", GRX32, GRX32, GRX32>;
+ def SELRMux : CondBinaryRRFaPseudo<"MUXselr", GRX32, GRX32, GRX32>;
defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>;
defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>;
defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>;
@@ -525,13 +534,13 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
let isCommutable = 1 in {
// Expands to LOCR or LOCFHR or a branch-and-move sequence,
// depending on the choice of registers.
- def LOCRMux : CondBinaryRRFPseudo<"locrmux", GRX32, GRX32>;
+ def LOCRMux : CondBinaryRRFPseudo<"MUXlocr", GRX32, GRX32>;
defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>;
}
// Load on condition. Matched via DAG pattern.
// Expands to LOC or LOCFH, depending on the choice of register.
- def LOCMux : CondUnaryRSYPseudo<simple_load, GRX32, 4>;
+ defm LOCMux : CondUnaryRSYPseudoAndMemFold<"MUXloc", simple_load, GRX32, 4>;
defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, simple_load, GRH32, 4>;
// Store on condition. Expanded from CondStore* pseudos.
@@ -564,7 +573,7 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in {
// Load on condition. Matched via DAG pattern.
defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, simple_load, GR32, 4>;
- defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, simple_load, GR64, 8>;
+ defm LOCG : CondUnaryRSYPairAndMemFold<"locg", 0xEBE2, simple_load, GR64, 8>;
// Store on condition. Expanded from CondStore* pseudos.
defm STOC : CondStoreRSYPair<"stoc", 0xEBF3, GR32, 4>;
@@ -1348,8 +1357,8 @@ def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>;
// Multiplication of memory, setting the condition code.
let Predicates = [FeatureMiscellaneousExtensions2], Defs = [CC] in {
- def MSC : BinaryRXY<"msc", 0xE353, null_frag, GR32, load, 4>;
- def MSGC : BinaryRXY<"msgc", 0xE383, null_frag, GR64, load, 8>;
+ defm MSC : BinaryRXYAndPseudo<"msc", 0xE353, null_frag, GR32, load, 4>;
+ defm MSGC : BinaryRXYAndPseudo<"msgc", 0xE383, null_frag, GR64, load, 8>;
}
// Multiplication of a register, producing two results.
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index c945122ee577..e73f1e429c3c 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -177,9 +177,13 @@ let Predicates = [FeatureVector] in {
let Predicates = [FeatureVectorPackedDecimal] in {
// Load rightmost with length. The number of loaded bytes is only known
- // at run time.
- def VLRL : BinaryVSI<"vlrl", 0xE635, int_s390_vlrl, 0>;
+ // at run time. Note that while the instruction will accept immediate
+ // lengths larger that 15 at runtime, those will always result in a trap,
+ // so we never emit them here.
+ def VLRL : BinaryVSI<"vlrl", 0xE635, null_frag, 0>;
def VLRLR : BinaryVRSd<"vlrlr", 0xE637, int_s390_vlrl, 0>;
+ def : Pat<(int_s390_vlrl imm32zx4:$len, bdaddr12only:$addr),
+ (VLRL bdaddr12only:$addr, imm32zx4:$len)>;
}
// Use replicating loads if we're inserting a single element into an
@@ -243,9 +247,13 @@ let Predicates = [FeatureVector] in {
let Predicates = [FeatureVectorPackedDecimal] in {
// Store rightmost with length. The number of stored bytes is only known
- // at run time.
- def VSTRL : StoreLengthVSI<"vstrl", 0xE63D, int_s390_vstrl, 0>;
+ // at run time. Note that while the instruction will accept immediate
+ // lengths larger that 15 at runtime, those will always result in a trap,
+ // so we never emit them here.
+ def VSTRL : StoreLengthVSI<"vstrl", 0xE63D, null_frag, 0>;
def VSTRLR : StoreLengthVRSd<"vstrlr", 0xE63F, int_s390_vstrl, 0>;
+ def : Pat<(int_s390_vstrl VR128:$val, imm32zx4:$len, bdaddr12only:$addr),
+ (VSTRL VR128:$val, bdaddr12only:$addr, imm32zx4:$len)>;
}
//===----------------------------------------------------------------------===//
@@ -463,49 +471,56 @@ defm : GenericVectorOps<v2f64, v2i64>;
//===----------------------------------------------------------------------===//
let Predicates = [FeatureVector] in {
- // Add.
- def VA : BinaryVRRcGeneric<"va", 0xE7F3>;
- def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>;
- def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>;
- def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>;
- def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>;
- def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>;
-
- // Add compute carry.
- def VACC : BinaryVRRcGeneric<"vacc", 0xE7F1>;
- def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>;
- def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>;
- def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>;
- def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>;
- def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>;
-
- // Add with carry.
- def VAC : TernaryVRRdGeneric<"vac", 0xE7BB>;
- def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>;
-
- // Add with carry compute carry.
- def VACCC : TernaryVRRdGeneric<"vaccc", 0xE7B9>;
- def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>;
+ let isCommutable = 1 in {
+ // Add.
+ def VA : BinaryVRRcGeneric<"va", 0xE7F3>;
+ def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>;
+ def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>;
+ def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>;
+ def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>;
+ def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>;
+ }
+
+ let isCommutable = 1 in {
+ // Add compute carry.
+ def VACC : BinaryVRRcGeneric<"vacc", 0xE7F1>;
+ def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>;
+ def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>;
+ def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>;
+ def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>;
+ def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>;
+
+ // Add with carry.
+ def VAC : TernaryVRRdGeneric<"vac", 0xE7BB>;
+ def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>;
+
+ // Add with carry compute carry.
+ def VACCC : TernaryVRRdGeneric<"vaccc", 0xE7B9>;
+ def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>;
+ }
// And.
- def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>;
+ let isCommutable = 1 in
+ def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>;
// And with complement.
def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>;
- // Average.
- def VAVG : BinaryVRRcGeneric<"vavg", 0xE7F2>;
- def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>;
- def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>;
- def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>;
- def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>;
-
- // Average logical.
- def VAVGL : BinaryVRRcGeneric<"vavgl", 0xE7F0>;
- def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>;
- def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>;
- def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>;
- def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>;
+ let isCommutable = 1 in {
+ // Average.
+ def VAVG : BinaryVRRcGeneric<"vavg", 0xE7F2>;
+ def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>;
+ def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>;
+ def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>;
+ def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>;
+
+ // Average logical.
+ def VAVGL : BinaryVRRcGeneric<"vavgl", 0xE7F0>;
+ def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>;
+ def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>;
+ def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>;
+ def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>;
+ }
// Checksum.
def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>;
@@ -524,12 +539,14 @@ let Predicates = [FeatureVector] in {
def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>;
def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>;
- // Not exclusive or.
- let Predicates = [FeatureVectorEnhancements1] in
- def VNX : BinaryVRRc<"vnx", 0xE76C, null_frag, v128any, v128any>;
+ let isCommutable = 1 in {
+ // Not exclusive or.
+ let Predicates = [FeatureVectorEnhancements1] in
+ def VNX : BinaryVRRc<"vnx", 0xE76C, null_frag, v128any, v128any>;
- // Exclusive or.
- def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>;
+ // Exclusive or.
+ def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>;
+ }
// Galois field multiply sum.
def VGFM : BinaryVRRcGeneric<"vgfm", 0xE7B4>;
@@ -559,135 +576,145 @@ let Predicates = [FeatureVector] in {
def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>;
def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>;
- // Maximum.
- def VMX : BinaryVRRcGeneric<"vmx", 0xE7FF>;
- def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>;
- def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>;
- def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>;
- def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>;
-
- // Maximum logical.
- def VMXL : BinaryVRRcGeneric<"vmxl", 0xE7FD>;
- def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>;
- def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>;
- def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>;
- def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>;
+ let isCommutable = 1 in {
+ // Maximum.
+ def VMX : BinaryVRRcGeneric<"vmx", 0xE7FF>;
+ def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>;
+ def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>;
+ def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>;
+ def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>;
+
+ // Maximum logical.
+ def VMXL : BinaryVRRcGeneric<"vmxl", 0xE7FD>;
+ def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>;
+ def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>;
+ def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>;
+ def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>;
+ }
- // Minimum.
- def VMN : BinaryVRRcGeneric<"vmn", 0xE7FE>;
- def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>;
- def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>;
- def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>;
- def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>;
-
- // Minimum logical.
- def VMNL : BinaryVRRcGeneric<"vmnl", 0xE7FC>;
- def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>;
- def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>;
- def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>;
- def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>;
-
- // Multiply and add low.
- def VMAL : TernaryVRRdGeneric<"vmal", 0xE7AA>;
- def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>;
- def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>;
- def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>;
-
- // Multiply and add high.
- def VMAH : TernaryVRRdGeneric<"vmah", 0xE7AB>;
- def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>;
- def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>;
- def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>;
-
- // Multiply and add logical high.
- def VMALH : TernaryVRRdGeneric<"vmalh", 0xE7A9>;
- def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>;
- def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>;
- def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>;
-
- // Multiply and add even.
- def VMAE : TernaryVRRdGeneric<"vmae", 0xE7AE>;
- def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>;
- def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>;
- def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>;
-
- // Multiply and add logical even.
- def VMALE : TernaryVRRdGeneric<"vmale", 0xE7AC>;
- def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>;
- def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>;
- def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>;
-
- // Multiply and add odd.
- def VMAO : TernaryVRRdGeneric<"vmao", 0xE7AF>;
- def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>;
- def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>;
- def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>;
-
- // Multiply and add logical odd.
- def VMALO : TernaryVRRdGeneric<"vmalo", 0xE7AD>;
- def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>;
- def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>;
- def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>;
-
- // Multiply high.
- def VMH : BinaryVRRcGeneric<"vmh", 0xE7A3>;
- def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>;
- def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>;
- def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>;
-
- // Multiply logical high.
- def VMLH : BinaryVRRcGeneric<"vmlh", 0xE7A1>;
- def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>;
- def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>;
- def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>;
-
- // Multiply low.
- def VML : BinaryVRRcGeneric<"vml", 0xE7A2>;
- def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>;
- def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>;
- def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>;
-
- // Multiply even.
- def VME : BinaryVRRcGeneric<"vme", 0xE7A6>;
- def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>;
- def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>;
- def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>;
-
- // Multiply logical even.
- def VMLE : BinaryVRRcGeneric<"vmle", 0xE7A4>;
- def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>;
- def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>;
- def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>;
-
- // Multiply odd.
- def VMO : BinaryVRRcGeneric<"vmo", 0xE7A7>;
- def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>;
- def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>;
- def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>;
-
- // Multiply logical odd.
- def VMLO : BinaryVRRcGeneric<"vmlo", 0xE7A5>;
- def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>;
- def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>;
- def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>;
+ let isCommutable = 1 in {
+ // Minimum.
+ def VMN : BinaryVRRcGeneric<"vmn", 0xE7FE>;
+ def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>;
+ def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>;
+ def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>;
+ def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>;
+
+ // Minimum logical.
+ def VMNL : BinaryVRRcGeneric<"vmnl", 0xE7FC>;
+ def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>;
+ def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>;
+ def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>;
+ def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>;
+ }
+
+ let isCommutable = 1 in {
+ // Multiply and add low.
+ def VMAL : TernaryVRRdGeneric<"vmal", 0xE7AA>;
+ def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>;
+ def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>;
+ def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>;
+
+ // Multiply and add high.
+ def VMAH : TernaryVRRdGeneric<"vmah", 0xE7AB>;
+ def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>;
+ def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>;
+ def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>;
+
+ // Multiply and add logical high.
+ def VMALH : TernaryVRRdGeneric<"vmalh", 0xE7A9>;
+ def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>;
+ def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>;
+ def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>;
+
+ // Multiply and add even.
+ def VMAE : TernaryVRRdGeneric<"vmae", 0xE7AE>;
+ def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>;
+ def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>;
+ def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>;
+
+ // Multiply and add logical even.
+ def VMALE : TernaryVRRdGeneric<"vmale", 0xE7AC>;
+ def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>;
+ def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>;
+ def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>;
+
+ // Multiply and add odd.
+ def VMAO : TernaryVRRdGeneric<"vmao", 0xE7AF>;
+ def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>;
+ def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>;
+ def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>;
+
+ // Multiply and add logical odd.
+ def VMALO : TernaryVRRdGeneric<"vmalo", 0xE7AD>;
+ def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>;
+ def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>;
+ def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>;
+ }
+
+ let isCommutable = 1 in {
+ // Multiply high.
+ def VMH : BinaryVRRcGeneric<"vmh", 0xE7A3>;
+ def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>;
+ def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>;
+ def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>;
+
+ // Multiply logical high.
+ def VMLH : BinaryVRRcGeneric<"vmlh", 0xE7A1>;
+ def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>;
+ def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>;
+ def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>;
+
+ // Multiply low.
+ def VML : BinaryVRRcGeneric<"vml", 0xE7A2>;
+ def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>;
+ def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>;
+ def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>;
+
+ // Multiply even.
+ def VME : BinaryVRRcGeneric<"vme", 0xE7A6>;
+ def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>;
+ def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>;
+ def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>;
+
+ // Multiply logical even.
+ def VMLE : BinaryVRRcGeneric<"vmle", 0xE7A4>;
+ def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>;
+ def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>;
+ def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>;
+
+ // Multiply odd.
+ def VMO : BinaryVRRcGeneric<"vmo", 0xE7A7>;
+ def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>;
+ def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>;
+ def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>;
+
+ // Multiply logical odd.
+ def VMLO : BinaryVRRcGeneric<"vmlo", 0xE7A5>;
+ def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>;
+ def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>;
+ def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>;
+ }
// Multiply sum logical.
- let Predicates = [FeatureVectorEnhancements1] in {
+ let Predicates = [FeatureVectorEnhancements1], isCommutable = 1 in {
def VMSL : QuaternaryVRRdGeneric<"vmsl", 0xE7B8>;
def VMSLG : QuaternaryVRRd<"vmslg", 0xE7B8, int_s390_vmslg,
v128q, v128g, v128g, v128q, 3>;
}
// Nand.
- let Predicates = [FeatureVectorEnhancements1] in
+ let Predicates = [FeatureVectorEnhancements1], isCommutable = 1 in
def VNN : BinaryVRRc<"vnn", 0xE76E, null_frag, v128any, v128any>;
// Nor.
- def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>;
+ let isCommutable = 1 in
+ def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>;
def : InstAlias<"vnot\t$V1, $V2", (VNO VR128:$V1, VR128:$V2, VR128:$V2), 0>;
// Or.
- def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>;
+ let isCommutable = 1 in
+ def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>;
// Or with complement.
let Predicates = [FeatureVectorEnhancements1] in
@@ -1017,13 +1044,15 @@ multiclass VectorRounding<Instruction insn, TypedReg tr> {
let Predicates = [FeatureVector] in {
// Add.
- let Uses = [FPC], mayRaiseFPException = 1 in {
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {
def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
def VFADB : BinaryVRRc<"vfadb", 0xE7E3, any_fadd, v128db, v128db, 3, 0>;
- def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8>;
+ def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8, 0,
+ "adbr">;
let Predicates = [FeatureVectorEnhancements1] in {
def VFASB : BinaryVRRc<"vfasb", 0xE7E3, any_fadd, v128sb, v128sb, 2, 0>;
- def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8>;
+ def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8, 0,
+ "aebr">;
def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, any_fadd, v128xb, v128xb, 4, 8>;
}
}
@@ -1104,10 +1133,12 @@ let Predicates = [FeatureVector] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, any_fdiv, v128db, v128db, 3, 0>;
- def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8>;
+ def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8, 0,
+ "ddbr">;
let Predicates = [FeatureVectorEnhancements1] in {
def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, any_fdiv, v128sb, v128sb, 2, 0>;
- def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8>;
+ def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8, 0,
+ "debr">;
def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, any_fdiv, v128xb, v128xb, 4, 8>;
}
}
@@ -1135,7 +1166,8 @@ let Predicates = [FeatureVector] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_any_vextend, v128db, v128sb, 2, 0>;
- def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>;
+ def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8, 0,
+ "ldebr">;
}
let Predicates = [FeatureVectorEnhancements1] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
@@ -1178,7 +1210,7 @@ let Predicates = [FeatureVector] in {
def : FPMinMax<insn, any_fmaximum, tr, 1>;
}
let Predicates = [FeatureVectorEnhancements1] in {
- let Uses = [FPC], mayRaiseFPException = 1 in {
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {
def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb,
v128db, v128db, 3, 0>;
@@ -1204,7 +1236,7 @@ let Predicates = [FeatureVector] in {
def : FPMinMax<insn, any_fminimum, tr, 1>;
}
let Predicates = [FeatureVectorEnhancements1] in {
- let Uses = [FPC], mayRaiseFPException = 1 in {
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {
def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb,
v128db, v128db, 3, 0>;
@@ -1225,43 +1257,49 @@ let Predicates = [FeatureVector] in {
}
// Multiply.
- let Uses = [FPC], mayRaiseFPException = 1 in {
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {
def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, any_fmul, v128db, v128db, 3, 0>;
- def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8>;
+ def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8, 0,
+ "mdbr">;
let Predicates = [FeatureVectorEnhancements1] in {
def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, any_fmul, v128sb, v128sb, 2, 0>;
- def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8>;
+ def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8, 0,
+ "meebr">;
def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, any_fmul, v128xb, v128xb, 4, 8>;
}
}
// Multiply and add.
- let Uses = [FPC], mayRaiseFPException = 1 in {
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {
def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, any_fma, v128db, v128db, 0, 3>;
- def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3>;
+ def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3,
+ "madbr">;
let Predicates = [FeatureVectorEnhancements1] in {
def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, any_fma, v128sb, v128sb, 0, 2>;
- def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2>;
+ def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2,
+ "maebr">;
def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, any_fma, v128xb, v128xb, 8, 4>;
}
}
// Multiply and subtract.
- let Uses = [FPC], mayRaiseFPException = 1 in {
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1 in {
def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, any_fms, v128db, v128db, 0, 3>;
- def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3>;
+ def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3,
+ "msdbr">;
let Predicates = [FeatureVectorEnhancements1] in {
def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, any_fms, v128sb, v128sb, 0, 2>;
- def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2>;
+ def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2,
+ "msebr">;
def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, any_fms, v128xb, v128xb, 8, 4>;
}
}
// Negative multiply and add.
- let Uses = [FPC], mayRaiseFPException = 1,
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1,
Predicates = [FeatureVectorEnhancements1] in {
def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>;
def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, any_fnma, v128db, v128db, 0, 3>;
@@ -1272,7 +1310,7 @@ let Predicates = [FeatureVector] in {
}
// Negative multiply and subtract.
- let Uses = [FPC], mayRaiseFPException = 1,
+ let Uses = [FPC], mayRaiseFPException = 1, isCommutable = 1,
Predicates = [FeatureVectorEnhancements1] in {
def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>;
def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, any_fnms, v128db, v128db, 0, 3>;
@@ -1323,10 +1361,12 @@ let Predicates = [FeatureVector] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, any_fsqrt, v128db, v128db, 3, 0>;
- def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8>;
+ def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8, 0,
+ "sqdbr">;
let Predicates = [FeatureVectorEnhancements1] in {
def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, any_fsqrt, v128sb, v128sb, 2, 0>;
- def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8>;
+ def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8, 0,
+ "sqebr">;
def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, any_fsqrt, v128xb, v128xb, 4, 8>;
}
}
@@ -1335,10 +1375,12 @@ let Predicates = [FeatureVector] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, any_fsub, v128db, v128db, 3, 0>;
- def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8>;
+ def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8, 0,
+ "sdbr">;
let Predicates = [FeatureVectorEnhancements1] in {
def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, any_fsub, v128sb, v128sb, 2, 0>;
- def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8>;
+ def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8, 0,
+ "sebr">;
def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, any_fsub, v128xb, v128xb, 4, 8>;
}
}
@@ -1364,9 +1406,9 @@ let Predicates = [FeatureVector] in {
// Compare scalar.
let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
- def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3>;
+ def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3, "cdbr">;
let Predicates = [FeatureVectorEnhancements1] in {
- def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2>;
+ def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2, "cebr">;
def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_any_fcmp, v128xb, 4>;
}
}
@@ -1374,9 +1416,9 @@ let Predicates = [FeatureVector] in {
// Compare and signal scalar.
let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
- def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, z_strict_fcmps, v64db, 3>;
+ def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, z_strict_fcmps, v64db, 3, "kdbr">;
let Predicates = [FeatureVectorEnhancements1] in {
- def WFKSB : CompareVRRa<"wfksb", 0xE7CA, z_strict_fcmps, v32sb, 2>;
+ def WFKSB : CompareVRRa<"wfksb", 0xE7CA, z_strict_fcmps, v32sb, 2, "kebr">;
def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, z_strict_fcmps, v128xb, 4>;
}
}
@@ -1545,7 +1587,7 @@ def : VectorReplicateScalar<v16i8, VREPB, 7>;
def : VectorReplicateScalar<v8i16, VREPH, 3>;
def : VectorReplicateScalar<v4i32, VREPF, 1>;
-// i64 replications are just a single isntruction.
+// i64 replications are just a single instruction.
def : Pat<(v2i64 (z_replicate GR64:$scalar)),
(VLVGP GR64:$scalar, GR64:$scalar)>;
diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index d1f6511ceea3..f755d5cd3d5b 100644
--- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -29,8 +29,8 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
SystemZ::GPRRegs SpillGPRRegs;
SystemZ::GPRRegs RestoreGPRRegs;
- unsigned VarArgsFirstGPR;
- unsigned VarArgsFirstFPR;
+ Register VarArgsFirstGPR;
+ Register VarArgsFirstFPR;
unsigned VarArgsFrameIndex;
unsigned RegSaveFrameIndex;
int FramePointerSaveIndex;
@@ -47,7 +47,7 @@ public:
// this function and the SP offset for the STMG. These are 0 if no GPRs
// need to be saved or restored.
SystemZ::GPRRegs getSpillGPRRegs() const { return SpillGPRRegs; }
- void setSpillGPRRegs(unsigned Low, unsigned High, unsigned Offs) {
+ void setSpillGPRRegs(Register Low, Register High, unsigned Offs) {
SpillGPRRegs.LowGPR = Low;
SpillGPRRegs.HighGPR = High;
SpillGPRRegs.GPROffset = Offs;
@@ -57,7 +57,7 @@ public:
// this function and the SP offset for the LMG. These are 0 if no GPRs
// need to be saved or restored.
SystemZ::GPRRegs getRestoreGPRRegs() const { return RestoreGPRRegs; }
- void setRestoreGPRRegs(unsigned Low, unsigned High, unsigned Offs) {
+ void setRestoreGPRRegs(Register Low, Register High, unsigned Offs) {
RestoreGPRRegs.LowGPR = Low;
RestoreGPRRegs.HighGPR = High;
RestoreGPRRegs.GPROffset = Offs;
@@ -65,12 +65,12 @@ public:
// Get and set the number of fixed (as opposed to variable) arguments
// that are passed in GPRs to this function.
- unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; }
- void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; }
+ Register getVarArgsFirstGPR() const { return VarArgsFirstGPR; }
+ void setVarArgsFirstGPR(Register GPR) { VarArgsFirstGPR = GPR; }
// Likewise FPRs.
- unsigned getVarArgsFirstFPR() const { return VarArgsFirstFPR; }
- void setVarArgsFirstFPR(unsigned FPR) { VarArgsFirstFPR = FPR; }
+ Register getVarArgsFirstFPR() const { return VarArgsFirstFPR; }
+ void setVarArgsFirstFPR(Register FPR) { VarArgsFirstFPR = FPR; }
// Get and set the frame index of the first stack vararg.
unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
diff --git a/llvm/lib/Target/SystemZ/SystemZOperands.td b/llvm/lib/Target/SystemZ/SystemZOperands.td
index bd40f6d7bf40..a883daad73e7 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperands.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -22,8 +22,8 @@ class ImmediateTLSAsmOperand<string name>
}
class ImmediateOp<ValueType vt, string asmop> : Operand<vt> {
- let PrintMethod = "print"##asmop##"Operand";
- let DecoderMethod = "decode"##asmop##"Operand";
+ let PrintMethod = "print"#asmop#"Operand";
+ let DecoderMethod = "decode"#asmop#"Operand";
let ParserMatchClass = !cast<AsmOperandClass>(asmop);
let OperandType = "OPERAND_IMMEDIATE";
}
@@ -52,14 +52,14 @@ multiclass Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop> {
// Constructs an asm operand for a PC-relative address. SIZE says how
// many bits there are.
-class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"##size> {
+class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"#size> {
let PredicateMethod = "isImm";
- let ParserMethod = "parsePCRel"##size;
+ let ParserMethod = "parsePCRel"#size;
}
class PCRelTLSAsmOperand<string size>
- : ImmediateTLSAsmOperand<"PCRelTLS"##size> {
+ : ImmediateTLSAsmOperand<"PCRelTLS"#size> {
let PredicateMethod = "isImmTLS";
- let ParserMethod = "parsePCRelTLS"##size;
+ let ParserMethod = "parsePCRelTLS"#size;
}
// Constructs an operand for a PC-relative address with address type VT.
@@ -92,9 +92,9 @@ class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop>
class AddressAsmOperand<string format, string bitsize, string dispsize,
string length = "">
: AsmOperandClass {
- let Name = format##bitsize##"Disp"##dispsize##length;
- let ParserMethod = "parse"##format##bitsize;
- let RenderMethod = "add"##format##"Operands";
+ let Name = format#bitsize#"Disp"#dispsize#length;
+ let ParserMethod = "parse"#format#bitsize;
+ let RenderMethod = "add"#format#"Operands";
}
// Constructs an instruction operand for an addressing mode. FORMAT,
@@ -103,15 +103,15 @@ class AddressAsmOperand<string format, string bitsize, string dispsize,
// (base register, displacement, etc.).
class AddressOperand<string bitsize, string dispsize, string length,
string format, dag operands>
- : Operand<!cast<ValueType>("i"##bitsize)> {
- let PrintMethod = "print"##format##"Operand";
- let EncoderMethod = "get"##format##dispsize##length##"Encoding";
+ : Operand<!cast<ValueType>("i"#bitsize)> {
+ let PrintMethod = "print"#format#"Operand";
+ let EncoderMethod = "get"#format#dispsize#length#"Encoding";
let DecoderMethod =
- "decode"##format##bitsize##"Disp"##dispsize##length##"Operand";
+ "decode"#format#bitsize#"Disp"#dispsize#length#"Operand";
let OperandType = "OPERAND_MEMORY";
let MIOperandInfo = operands;
let ParserMatchClass =
- !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length);
+ !cast<AddressAsmOperand>(format#bitsize#"Disp"#dispsize#length);
}
// Constructs both a DAG pattern and instruction operand for an addressing mode.
@@ -126,45 +126,45 @@ class AddressOperand<string bitsize, string dispsize, string length,
class AddressingMode<string seltype, string bitsize, string dispsize,
string suffix, string length, int numops, string format,
dag operands>
- : ComplexPattern<!cast<ValueType>("i"##bitsize), numops,
- "select"##seltype##dispsize##suffix##length,
+ : ComplexPattern<!cast<ValueType>("i"#bitsize), numops,
+ "select"#seltype#dispsize#suffix#length,
[add, sub, or, frameindex, z_adjdynalloc]>,
AddressOperand<bitsize, dispsize, length, format, operands>;
// An addressing mode with a base and displacement but no index.
class BDMode<string type, string bitsize, string dispsize, string suffix>
: AddressingMode<type, bitsize, dispsize, suffix, "", 2, "BDAddr",
- (ops !cast<RegisterOperand>("ADDR"##bitsize),
- !cast<Operand>("disp"##dispsize##"imm"##bitsize))>;
+ (ops !cast<RegisterOperand>("ADDR"#bitsize),
+ !cast<Operand>("disp"#dispsize#"imm"#bitsize))>;
// An addressing mode with a base, displacement and index.
class BDXMode<string type, string bitsize, string dispsize, string suffix>
: AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDXAddr",
- (ops !cast<RegisterOperand>("ADDR"##bitsize),
- !cast<Operand>("disp"##dispsize##"imm"##bitsize),
- !cast<RegisterOperand>("ADDR"##bitsize))>;
+ (ops !cast<RegisterOperand>("ADDR"#bitsize),
+ !cast<Operand>("disp"#dispsize#"imm"#bitsize),
+ !cast<RegisterOperand>("ADDR"#bitsize))>;
// A BDMode paired with an immediate length operand of LENSIZE bits.
class BDLMode<string type, string bitsize, string dispsize, string suffix,
string lensize>
- : AddressingMode<type, bitsize, dispsize, suffix, "Len"##lensize, 3,
+ : AddressingMode<type, bitsize, dispsize, suffix, "Len"#lensize, 3,
"BDLAddr",
- (ops !cast<RegisterOperand>("ADDR"##bitsize),
- !cast<Operand>("disp"##dispsize##"imm"##bitsize),
- !cast<Operand>("imm"##bitsize))>;
+ (ops !cast<RegisterOperand>("ADDR"#bitsize),
+ !cast<Operand>("disp"#dispsize#"imm"#bitsize),
+ !cast<Operand>("imm"#bitsize))>;
// A BDMode paired with a register length operand.
class BDRMode<string type, string bitsize, string dispsize, string suffix>
: AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDRAddr",
- (ops !cast<RegisterOperand>("ADDR"##bitsize),
- !cast<Operand>("disp"##dispsize##"imm"##bitsize),
- !cast<RegisterOperand>("GR"##bitsize))>;
+ (ops !cast<RegisterOperand>("ADDR"#bitsize),
+ !cast<Operand>("disp"#dispsize#"imm"#bitsize),
+ !cast<RegisterOperand>("GR"#bitsize))>;
// An addressing mode with a base, displacement and a vector index.
class BDVMode<string bitsize, string dispsize>
: AddressOperand<bitsize, dispsize, "", "BDVAddr",
- (ops !cast<RegisterOperand>("ADDR"##bitsize),
- !cast<Operand>("disp"##dispsize##"imm"##bitsize),
+ (ops !cast<RegisterOperand>("ADDR"#bitsize),
+ !cast<Operand>("disp"#dispsize#"imm"#bitsize),
!cast<RegisterOperand>("VR128"))>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index a6a72903e573..81af5fd854db 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -40,6 +40,10 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2,
SDTCisSameAs<0, 2>,
SDTCisPtrTy<0>]>;
def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
+def SDT_ZProbedAlloca : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisPtrTy<0>]>;
def SDT_ZGR128Binary : SDTypeProfile<1, 2,
[SDTCisVT<0, untyped>,
SDTCisInt<1>,
@@ -269,6 +273,8 @@ def z_select_ccmask_1 : SDNode<"SystemZISD::SELECT_CCMASK",
SDT_ZSelectCCMask>;
def z_ipm_1 : SDNode<"SystemZISD::IPM", SDT_ZIPM>;
def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
+def z_probed_alloca : SDNode<"SystemZISD::PROBED_ALLOCA", SDT_ZProbedAlloca,
+ [SDNPHasChain]>;
def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>;
def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>;
@@ -374,7 +380,7 @@ def z_vstrsz_cc : SDNode<"SystemZISD::VSTRSZ_CC",
def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>;
class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
- : SDNode<"SystemZISD::"##name, profile,
+ : SDNode<"SystemZISD::"#name, profile,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def z_atomic_swapw : AtomicWOp<"ATOMIC_SWAPW">;
diff --git a/llvm/lib/Target/SystemZ/SystemZPatterns.td b/llvm/lib/Target/SystemZ/SystemZPatterns.td
index 501a69488397..e3190eddb9f1 100644
--- a/llvm/lib/Target/SystemZ/SystemZPatterns.td
+++ b/llvm/lib/Target/SystemZ/SystemZPatterns.td
@@ -57,10 +57,10 @@ multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode,
// The inserted operand is loaded using LOAD from an address of mode MODE.
multiclass InsertMem<string type, Instruction insn, RegisterOperand cls,
SDPatternOperator load, AddressingMode mode> {
- def : Pat<(!cast<SDPatternOperator>("or_as_"##type)
+ def : Pat<(!cast<SDPatternOperator>("or_as_"#type)
cls:$src1, (load mode:$src2)),
(insn cls:$src1, mode:$src2)>;
- def : Pat<(!cast<SDPatternOperator>("or_as_rev"##type)
+ def : Pat<(!cast<SDPatternOperator>("or_as_rev"#type)
(load mode:$src2), cls:$src1),
(insn cls:$src1, mode:$src2)>;
}
@@ -167,7 +167,7 @@ class FPConversion<Instruction insn, SDPatternOperator operator, TypedReg tr1,
: Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))),
(insn tr2.op:$vec, suppress, mode)>;
-// Use INSN to perform mininum/maximum operation OPERATOR on type TR.
+// Use INSN to perform minimum/maximum operation OPERATOR on type TR.
// FUNCTION is the type of minimum/maximum function to perform.
class FPMinMax<Instruction insn, SDPatternOperator operator, TypedReg tr,
bits<4> function>
diff --git a/llvm/lib/Target/SystemZ/SystemZProcessors.td b/llvm/lib/Target/SystemZ/SystemZProcessors.td
index af33a0300552..57c2411b8dcf 100644
--- a/llvm/lib/Target/SystemZ/SystemZProcessors.td
+++ b/llvm/lib/Target/SystemZ/SystemZProcessors.td
@@ -9,7 +9,7 @@
// Processor definitions.
//
// For compatibility with other compilers on the platform, each model can
-// be identifed either by the system name (e.g. z10) or the level of the
+// be identified either by the system name (e.g. z10) or the level of the
// architecture the model supports, as identified by the edition level
// of the z/Architecture Principles of Operation document (e.g. arch8).
//
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 0d5e7af92523..fe2aaca8429a 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -73,13 +73,10 @@ static void addHints(ArrayRef<MCPhysReg> Order,
Hints.push_back(Reg);
}
-bool
-SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
- ArrayRef<MCPhysReg> Order,
- SmallVectorImpl<MCPhysReg> &Hints,
- const MachineFunction &MF,
- const VirtRegMap *VRM,
- const LiveRegMatrix *Matrix) const {
+bool SystemZRegisterInfo::getRegAllocationHints(
+ Register VirtReg, ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
+ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -134,11 +131,11 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
}
if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) {
- SmallVector<unsigned, 8> Worklist;
- SmallSet<unsigned, 4> DoneRegs;
+ SmallVector<Register, 8> Worklist;
+ SmallSet<Register, 4> DoneRegs;
Worklist.push_back(VirtReg);
while (Worklist.size()) {
- unsigned Reg = Worklist.pop_back_val();
+ Register Reg = Worklist.pop_back_val();
if (!DoneRegs.insert(Reg).second)
continue;
@@ -267,14 +264,14 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// Decompose the frame index into a base and offset.
int FrameIndex = MI->getOperand(FIOperandNum).getIndex();
- unsigned BasePtr;
+ Register BasePtr;
int64_t Offset = (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr) +
MI->getOperand(FIOperandNum + 1).getImm());
// Special handling of dbg_value instructions.
if (MI->isDebugValue()) {
MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, /*isDef*/ false);
- MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ MI->getDebugOffset().ChangeToImmediate(Offset);
return;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 7044efef1ac6..9f2cca0c83f6 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -58,11 +58,9 @@ public:
const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
- bool getRegAllocationHints(unsigned VirtReg,
- ArrayRef<MCPhysReg> Order,
+ bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints,
- const MachineFunction &MF,
- const VirtRegMap *VRM,
+ const MachineFunction &MF, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;
// Override TargetRegisterInfo.h.
@@ -72,9 +70,6 @@ public:
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
return true;
}
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
- return true;
- }
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override;
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
index 3567b0f3acf8..a85862e62749 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -35,15 +35,15 @@ multiclass SystemZRegClass<string name, list<ValueType> types, int size,
dag regList, bit allocatable = 1> {
def AsmOperand : AsmOperandClass {
let Name = name;
- let ParserMethod = "parse"##name;
+ let ParserMethod = "parse"#name;
let RenderMethod = "addRegOperands";
}
let isAllocatable = allocatable in
def Bit : RegisterClass<"SystemZ", types, size, regList> {
let Size = size;
}
- def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> {
- let ParserMatchClass = !cast<AsmOperandClass>(name##"AsmOperand");
+ def "" : RegisterOperand<!cast<RegisterClass>(name#"Bit")> {
+ let ParserMatchClass = !cast<AsmOperandClass>(name#"AsmOperand");
}
}
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 47c925dcf730..6b4f35e5ba2b 100644
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -47,7 +47,7 @@ static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence,
SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline,
+ SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
if (IsVolatile)
return SDValue();
@@ -74,7 +74,7 @@ static SDValue memsetStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst,
- SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile,
+ SDValue Byte, SDValue Size, Align Alignment, bool IsVolatile,
MachinePointerInfo DstPtrInfo) const {
EVT PtrVT = Dst.getValueType();
@@ -97,20 +97,22 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes);
unsigned Size2 = Bytes - Size1;
SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1,
- Align, DstPtrInfo);
+ Alignment.value(), DstPtrInfo);
if (Size2 == 0)
return Chain1;
Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
DAG.getConstant(Size1, DL, PtrVT));
DstPtrInfo = DstPtrInfo.getWithOffset(Size1);
- SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2,
- std::min(Align, Size1), DstPtrInfo);
+ SDValue Chain2 = memsetStore(
+ DAG, DL, Chain, Dst, ByteVal, Size2,
+ std::min((unsigned)Alignment.value(), Size1), DstPtrInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2);
}
} else {
// Handle one and two bytes using STC.
if (Bytes <= 2) {
- SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align);
+ SDValue Chain1 =
+ DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Alignment);
if (Bytes == 1)
return Chain1;
SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
@@ -131,7 +133,7 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
// Copy the byte to the first location and then use MVC to copy
// it to the rest.
- Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Align);
+ Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Alignment);
SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
DAG.getConstant(1, DL, PtrVT));
return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index 7d63bae83cf3..a4a5b1fbdf90 100644
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -25,14 +25,15 @@ public:
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL,
SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool IsVolatile,
- bool AlwaysInline,
+ SDValue Size, Align Alignment,
+ bool IsVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const override;
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL,
SDValue Chain, SDValue Dst, SDValue Byte,
- SDValue Size, unsigned Align, bool IsVolatile,
+ SDValue Size, Align Alignment,
+ bool IsVolatile,
MachinePointerInfo DstPtrInfo) const override;
std::pair<SDValue, SDValue>
diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
index f6184cec795a..3d27b70d6ef9 100644
--- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -46,6 +46,7 @@ private:
bool shortenOn001(MachineInstr &MI, unsigned Opcode);
bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode);
bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
+ bool shortenFusedFPOp(MachineInstr &MI, unsigned Opcode);
const SystemZInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -64,7 +65,7 @@ SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm)
// Tie operands if MI has become a two-address instruction.
static void tieOpsIfNeeded(MachineInstr &MI) {
- if (MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+ if (MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&
!MI.getOperand(0).isTied())
MI.tieOperands(0, 1);
}
@@ -175,6 +176,32 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
return false;
}
+bool SystemZShortenInst::shortenFusedFPOp(MachineInstr &MI, unsigned Opcode) {
+ MachineOperand &DstMO = MI.getOperand(0);
+ MachineOperand &LHSMO = MI.getOperand(1);
+ MachineOperand &RHSMO = MI.getOperand(2);
+ MachineOperand &AccMO = MI.getOperand(3);
+ if (SystemZMC::getFirstReg(DstMO.getReg()) < 16 &&
+ SystemZMC::getFirstReg(LHSMO.getReg()) < 16 &&
+ SystemZMC::getFirstReg(RHSMO.getReg()) < 16 &&
+ SystemZMC::getFirstReg(AccMO.getReg()) < 16 &&
+ DstMO.getReg() == AccMO.getReg()) {
+ MachineOperand Lhs(LHSMO);
+ MachineOperand Rhs(RHSMO);
+ MachineOperand Src(AccMO);
+ MI.RemoveOperand(3);
+ MI.RemoveOperand(2);
+ MI.RemoveOperand(1);
+ MI.setDesc(TII->get(Opcode));
+ MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
+ .add(Src)
+ .add(Lhs)
+ .add(Rhs);
+ return true;
+ }
+ return false;
+}
+
// Process all instructions in MBB. Return true if something changed.
bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
@@ -235,6 +262,22 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
Changed |= shortenOn001(MI, SystemZ::MEEBR);
break;
+ case SystemZ::WFMADB:
+ Changed |= shortenFusedFPOp(MI, SystemZ::MADBR);
+ break;
+
+ case SystemZ::WFMASB:
+ Changed |= shortenFusedFPOp(MI, SystemZ::MAEBR);
+ break;
+
+ case SystemZ::WFMSDB:
+ Changed |= shortenFusedFPOp(MI, SystemZ::MSDBR);
+ break;
+
+ case SystemZ::WFMSSB:
+ Changed |= shortenFusedFPOp(MI, SystemZ::MSEBR);
+ break;
+
case SystemZ::WFLCDB:
Changed |= shortenOn01(MI, SystemZ::LCDFR);
break;
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index 5e8af81842c4..68e0b7ae66a4 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -9,6 +9,7 @@
#include "SystemZSubtarget.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -28,11 +29,16 @@ void SystemZSubtarget::anchor() {}
SystemZSubtarget &
SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
- std::string CPUName = CPU;
+ StringRef CPUName = CPU;
if (CPUName.empty())
CPUName = "generic";
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
+
+ // -msoft-float implies -mno-vx.
+ if (HasSoftFloat)
+ HasVector = false;
+
return *this;
}
@@ -57,7 +63,7 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
HasInsertReferenceBitsMultiple(false),
HasMiscellaneousExtensions3(false), HasMessageSecurityAssist9(false),
HasVectorEnhancements2(false), HasVectorPackedDecimalEnhancement(false),
- HasEnhancedSort(false), HasDeflateConversion(false),
+ HasEnhancedSort(false), HasDeflateConversion(false), HasSoftFloat(false),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(), FrameLowering() {}
@@ -68,9 +74,12 @@ bool SystemZSubtarget::enableSubRegLiveness() const {
bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
CodeModel::Model CM) const {
- // PC32DBL accesses require the low bit to be clear. Note that a zero
- // value selects the default alignment and is therefore OK.
- if (GV->getAlignment() == 1)
+ // PC32DBL accesses require the low bit to be clear.
+ //
+ // FIXME: Explicitly check for functions: the datalayout is currently
+ // missing information about function pointers.
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ if (GV->getPointerAlignment(DL) == 1 && !GV->getValueType()->isFunctionTy())
return false;
// For the small model, all locally-binding symbols are in range.
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index fa3f65d93c91..4b49c37fe4e6 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -68,6 +68,7 @@ protected:
bool HasVectorPackedDecimalEnhancement;
bool HasEnhancedSort;
bool HasDeflateConversion;
+ bool HasSoftFloat;
private:
Triple TargetTriple;
@@ -239,6 +240,9 @@ public:
// Return true if the target has the deflate-conversion facility.
bool hasDeflateConversion() const { return HasDeflateConversion; }
+ // Return true if soft float should be used.
+ bool hasSoftFloat() const { return HasSoftFloat; }
+
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/llvm/lib/Target/SystemZ/SystemZTDC.cpp
index f103812eb096..7cb7dca2ea28 100644
--- a/llvm/lib/Target/SystemZ/SystemZTDC.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTDC.cpp
@@ -44,7 +44,9 @@
//===----------------------------------------------------------------------===//
#include "SystemZ.h"
+#include "SystemZSubtarget.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -53,6 +55,7 @@
#include "llvm/IR/IntrinsicsS390.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetMachine.h"
#include <deque>
#include <set>
@@ -72,6 +75,11 @@ public:
}
bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ }
+
private:
// Maps seen instructions that can be mapped to a TDC, values are
// (TDC operand, TDC mask, worthy flag) triples.
@@ -310,6 +318,12 @@ void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
}
bool SystemZTDCPass::runOnFunction(Function &F) {
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ if (TPC.getTM<TargetMachine>()
+ .getSubtarget<SystemZSubtarget>(F)
+ .hasSoftFloat())
+ return false;
+
ConvertedInsts.clear();
LogicOpsWorklist.clear();
PossibleJunk.clear();
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index dfcdb5356485..3f467b200852 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -40,8 +40,10 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) {
// This is the case by default if CPU is z13 or later, and can be
// overridden via "[+-]vector" feature string elements.
bool VectorABI = true;
+ bool SoftFloat = false;
if (CPU.empty() || CPU == "generic" ||
- CPU == "z10" || CPU == "z196" || CPU == "zEC12")
+ CPU == "z10" || CPU == "z196" || CPU == "zEC12" ||
+ CPU == "arch8" || CPU == "arch9" || CPU == "arch10")
VectorABI = false;
SmallVector<StringRef, 3> Features;
@@ -51,9 +53,13 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) {
VectorABI = true;
if (Feature == "-vector")
VectorABI = false;
+ if (Feature == "soft-float" || Feature == "+soft-float")
+ SoftFloat = true;
+ if (Feature == "-soft-float")
+ SoftFloat = false;
}
- return VectorABI;
+ return VectorABI && !SoftFloat;
}
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
@@ -154,13 +160,46 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
getEffectiveRelocModel(RM),
getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT),
OL),
- TLOF(std::make_unique<TargetLoweringObjectFileELF>()),
- Subtarget(TT, CPU, FS, *this) {
+ TLOF(std::make_unique<TargetLoweringObjectFileELF>()) {
initAsmInfo();
}
SystemZTargetMachine::~SystemZTargetMachine() = default;
+const SystemZSubtarget *
+SystemZTargetMachine::getSubtargetImpl(const Function &F) const {
+ Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute FSAttr = F.getFnAttribute("target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ // FIXME: This is related to the code below to reset the target options,
+ // we need to know whether or not the soft float flag is set on the
+ // function, so we can enable it as a subtarget feature.
+ bool softFloat =
+ F.hasFnAttribute("use-soft-float") &&
+ F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+
+ if (softFloat)
+ FS += FS.empty() ? "+soft-float" : ",+soft-float";
+
+ auto &I = SubtargetMap[CPU + FS];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = std::make_unique<SystemZSubtarget>(TargetTriple, CPU, FS, *this);
+ }
+
+ return I.get();
+}
+
namespace {
/// SystemZ Code Generator Pass Configuration Options.
@@ -183,6 +222,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
bool addILPOpts() override;
+ void addPreRegAlloc() override;
void addPostRewrite() override;
void addPostRegAlloc() override;
void addPreSched2() override;
@@ -214,6 +254,10 @@ bool SystemZPassConfig::addILPOpts() {
return true;
}
+void SystemZPassConfig::addPreRegAlloc() {
+ addPass(createSystemZCopyPhysRegsPass(getSystemZTargetMachine()));
+}
+
void SystemZPassConfig::addPostRewrite() {
addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
}
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
index ac04a080f580..9ea03e104fc9 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -26,7 +26,8 @@ namespace llvm {
class SystemZTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- SystemZSubtarget Subtarget;
+
+ mutable StringMap<std::unique_ptr<SystemZSubtarget>> SubtargetMap;
public:
SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -35,11 +36,11 @@ public:
CodeGenOpt::Level OL, bool JIT);
~SystemZTargetMachine() override;
- const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
-
- const SystemZSubtarget *getSubtargetImpl(const Function &) const override {
- return &Subtarget;
- }
+ const SystemZSubtarget *getSubtargetImpl(const Function &) const override;
+ // DO NOT IMPLEMENT: There is no such thing as a valid default subtarget,
+ // subtargets are per-function entities based on the target-specific
+ // attributes of each function.
+ const SystemZSubtarget *getSubtargetImpl() const = delete;
// Override LLVMTargetMachine
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index acec3c533585..864200e5f71c 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -30,7 +30,8 @@ using namespace llvm;
//
//===----------------------------------------------------------------------===//
-int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -63,7 +64,8 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
}
int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -177,11 +179,12 @@ int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
break;
}
- return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+ return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -226,7 +229,7 @@ int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
return TTI::TCC_Free;
break;
}
- return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+ return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
TargetTransformInfo::PopcntSupportKind
@@ -246,8 +249,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
for (auto &BB : L->blocks())
for (auto &I : *BB) {
if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
- ImmutableCallSite CS(&I);
- if (const Function *F = CS.getCalledFunction()) {
+ if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
if (isLoweredToCall(F))
HasCall = true;
if (F->getIntrinsicID() == Intrinsic::memcpy ||
@@ -259,7 +261,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
}
if (isa<StoreInst>(&I)) {
Type *MemAccessTy = I.getOperand(0)->getType();
- NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0);
+ NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0,
+ TTI::TCK_RecipThroughput);
}
}
@@ -291,6 +294,10 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.Force = true;
}
+void SystemZTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) {
@@ -323,6 +330,23 @@ unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const {
return 0;
}
+unsigned SystemZTTIImpl::getMinPrefetchStride(unsigned NumMemAccesses,
+ unsigned NumStridedMemAccesses,
+ unsigned NumPrefetches,
+ bool HasCall) const {
+ // Don't prefetch a loop with many far apart accesses.
+ if (NumPrefetches > 16)
+ return UINT_MAX;
+
+ // Emit prefetch instructions for smaller strides in cases where we think
+ // the hardware prefetcher might not be able to keep up.
+ if (NumStridedMemAccesses > 32 &&
+ NumStridedMemAccesses == NumMemAccesses && !HasCall)
+ return 1;
+
+ return ST->hasMiscellaneousExtensions3() ? 8192 : 2048;
+}
+
bool SystemZTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
EVT VT = TLI->getValueType(DL, DataType);
return (VT.isScalarInteger() && TLI->isTypeLegal(VT));
@@ -341,18 +365,25 @@ static unsigned getScalarSizeInBits(Type *Ty) {
// type until it is legal. This would e.g. return 4 for <6 x i64>, instead of
// 3.
static unsigned getNumVectorRegs(Type *Ty) {
- assert(Ty->isVectorTy() && "Expected vector type");
- unsigned WideBits = getScalarSizeInBits(Ty) * Ty->getVectorNumElements();
+ auto *VTy = cast<FixedVectorType>(Ty);
+ unsigned WideBits = getScalarSizeInBits(Ty) * VTy->getNumElements();
assert(WideBits > 0 && "Could not compute size of vector");
return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
}
int SystemZTTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+ TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
+ // TODO: Handle more cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+ Op2Info, Opd1PropInfo,
+ Opd2PropInfo, Args, CxtI);
+
// TODO: return a good value for BB-VECTORIZER that includes the
// immediate loads, which we do not want to count for the loop
// vectorizer, since they are hopefully hoisted out of the loop. This
@@ -391,10 +422,59 @@ int SystemZTTIImpl::getArithmeticInstrCost(
}
}
- if (Ty->isVectorTy()) {
- assert(ST->hasVector() &&
- "getArithmeticInstrCost() called with vector type.");
- unsigned VF = Ty->getVectorNumElements();
+ if (!Ty->isVectorTy()) {
+ // These FP operations are supported with a dedicated instruction for
+ // float, double and fp128 (base implementation assumes float generally
+ // costs 2).
+ if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
+ return 1;
+
+ // There is no native support for FRem.
+ if (Opcode == Instruction::FRem)
+ return LIBCALL_COST;
+
+ // Give discount for some combined logical operations if supported.
+ if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) {
+ if (Opcode == Instruction::Xor) {
+ for (const Value *A : Args) {
+ if (const Instruction *I = dyn_cast<Instruction>(A))
+ if (I->hasOneUse() &&
+ (I->getOpcode() == Instruction::And ||
+ I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::Xor))
+ return 0;
+ }
+ }
+ else if (Opcode == Instruction::Or || Opcode == Instruction::And) {
+ for (const Value *A : Args) {
+ if (const Instruction *I = dyn_cast<Instruction>(A))
+ if (I->hasOneUse() && I->getOpcode() == Instruction::Xor)
+ return 0;
+ }
+ }
+ }
+
+ // Or requires one instruction, although it has custom handling for i64.
+ if (Opcode == Instruction::Or)
+ return 1;
+
+ if (Opcode == Instruction::Xor && ScalarBits == 1) {
+ if (ST->hasLoadStoreOnCond2())
+ return 5; // 2 * (li 0; loc 1); xor
+ return 7; // 2 * ipm sequences ; xor ; shift ; compare
+ }
+
+ if (DivRemConstPow2)
+ return (SignedDivRem ? SDivPow2Cost : 1);
+ if (DivRemConst)
+ return DivMulSeqCost;
+ if (SignedDivRem || UnsignedDivRem)
+ return DivInstrCost;
+ }
+ else if (ST->hasVector()) {
+ auto *VTy = cast<FixedVectorType>(Ty);
+ unsigned VF = VTy->getNumElements();
unsigned NumVectors = getNumVectorRegs(Ty);
// These vector operations are custom handled, but are still supported
@@ -407,7 +487,7 @@ int SystemZTTIImpl::getArithmeticInstrCost(
if (DivRemConstPow2)
return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
if (DivRemConst)
- return VF * DivMulSeqCost + getScalarizationOverhead(Ty, Args);
+ return VF * DivMulSeqCost + getScalarizationOverhead(VTy, Args);
if ((SignedDivRem || UnsignedDivRem) && VF > 4)
// Temporary hack: disable high vectorization factors with integer
// division/remainder, which will get scalarized and handled with
@@ -429,8 +509,8 @@ int SystemZTTIImpl::getArithmeticInstrCost(
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
unsigned ScalarCost =
- getArithmeticInstrCost(Opcode, Ty->getScalarType());
- unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args);
+ getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind);
+ unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(VTy, Args);
// FIXME: VF 2 for these FP operations are currently just as
// expensive as for VF 4.
if (VF == 2)
@@ -447,101 +527,51 @@ int SystemZTTIImpl::getArithmeticInstrCost(
// There is no native support for FRem.
if (Opcode == Instruction::FRem) {
- unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(Ty, Args);
+ unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(VTy, Args);
// FIXME: VF 2 for float is currently just as expensive as for VF 4.
if (VF == 2 && ScalarBits == 32)
Cost *= 2;
return Cost;
}
}
- else { // Scalar:
- // These FP operations are supported with a dedicated instruction for
- // float, double and fp128 (base implementation assumes float generally
- // costs 2).
- if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
- Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
- return 1;
-
- // There is no native support for FRem.
- if (Opcode == Instruction::FRem)
- return LIBCALL_COST;
-
- // Give discount for some combined logical operations if supported.
- if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) {
- if (Opcode == Instruction::Xor) {
- for (const Value *A : Args) {
- if (const Instruction *I = dyn_cast<Instruction>(A))
- if (I->hasOneUse() &&
- (I->getOpcode() == Instruction::And ||
- I->getOpcode() == Instruction::Or ||
- I->getOpcode() == Instruction::Xor))
- return 0;
- }
- }
- else if (Opcode == Instruction::Or || Opcode == Instruction::And) {
- for (const Value *A : Args) {
- if (const Instruction *I = dyn_cast<Instruction>(A))
- if (I->hasOneUse() && I->getOpcode() == Instruction::Xor)
- return 0;
- }
- }
- }
-
- // Or requires one instruction, although it has custom handling for i64.
- if (Opcode == Instruction::Or)
- return 1;
-
- if (Opcode == Instruction::Xor && ScalarBits == 1) {
- if (ST->hasLoadStoreOnCond2())
- return 5; // 2 * (li 0; loc 1); xor
- return 7; // 2 * ipm sequences ; xor ; shift ; compare
- }
-
- if (DivRemConstPow2)
- return (SignedDivRem ? SDivPow2Cost : 1);
- if (DivRemConst)
- return DivMulSeqCost;
- if (SignedDivRem || UnsignedDivRem)
- return DivInstrCost;
- }
// Fallback to the default implementation.
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
-int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
- assert (Tp->isVectorTy());
- assert (ST->hasVector() && "getShuffleCost() called.");
- unsigned NumVectors = getNumVectorRegs(Tp);
+int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
+ int Index, VectorType *SubTp) {
+ if (ST->hasVector()) {
+ unsigned NumVectors = getNumVectorRegs(Tp);
- // TODO: Since fp32 is expanded, the shuffle cost should always be 0.
+ // TODO: Since fp32 is expanded, the shuffle cost should always be 0.
- // FP128 values are always in scalar registers, so there is no work
- // involved with a shuffle, except for broadcast. In that case register
- // moves are done with a single instruction per element.
- if (Tp->getScalarType()->isFP128Ty())
- return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0);
+ // FP128 values are always in scalar registers, so there is no work
+ // involved with a shuffle, except for broadcast. In that case register
+ // moves are done with a single instruction per element.
+ if (Tp->getScalarType()->isFP128Ty())
+ return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0);
- switch (Kind) {
- case TargetTransformInfo::SK_ExtractSubvector:
- // ExtractSubvector Index indicates start offset.
+ switch (Kind) {
+ case TargetTransformInfo::SK_ExtractSubvector:
+ // ExtractSubvector Index indicates start offset.
- // Extracting a subvector from first index is a noop.
- return (Index == 0 ? 0 : NumVectors);
+ // Extracting a subvector from first index is a noop.
+ return (Index == 0 ? 0 : NumVectors);
- case TargetTransformInfo::SK_Broadcast:
- // Loop vectorizer calls here to figure out the extra cost of
- // broadcasting a loaded value to all elements of a vector. Since vlrep
- // loads and replicates with a single instruction, adjust the returned
- // value.
- return NumVectors - 1;
+ case TargetTransformInfo::SK_Broadcast:
+ // Loop vectorizer calls here to figure out the extra cost of
+ // broadcasting a loaded value to all elements of a vector. Since vlrep
+ // loads and replicates with a single instruction, adjust the returned
+ // value.
+ return NumVectors - 1;
- default:
+ default:
- // SystemZ supports single instruction permutation / replication.
- return NumVectors;
+ // SystemZ supports single instruction permutation / replication.
+ return NumVectors;
+ }
}
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
@@ -564,8 +594,9 @@ getVectorTruncCost(Type *SrcTy, Type *DstTy) {
assert (SrcTy->isVectorTy() && DstTy->isVectorTy());
assert (SrcTy->getPrimitiveSizeInBits() > DstTy->getPrimitiveSizeInBits() &&
"Packing must reduce size of vector type.");
- assert (SrcTy->getVectorNumElements() == DstTy->getVectorNumElements() &&
- "Packing should not change number of elements.");
+ assert(cast<FixedVectorType>(SrcTy)->getNumElements() ==
+ cast<FixedVectorType>(DstTy)->getNumElements() &&
+ "Packing should not change number of elements.");
// TODO: Since fp32 is expanded, the extract cost should always be 0.
@@ -580,7 +611,7 @@ getVectorTruncCost(Type *SrcTy, Type *DstTy) {
unsigned Cost = 0;
unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy);
- unsigned VF = SrcTy->getVectorNumElements();
+ unsigned VF = cast<FixedVectorType>(SrcTy)->getNumElements();
for (unsigned P = 0; P < Log2Diff; ++P) {
if (NumParts > 1)
NumParts /= 2;
@@ -642,7 +673,7 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
// Return the potentially vectorized type based on 'I' and 'VF'. 'I' may
// be either scalar or already vectorized with a same or lesser VF.
Type *ElTy = OpTy->getScalarType();
- return VectorType::get(ElTy, VF);
+ return FixedVectorType::get(ElTy, VF);
}
return nullptr;
@@ -653,8 +684,8 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
unsigned SystemZTTIImpl::
getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
const Instruction *I) {
- assert (Dst->isVectorTy());
- unsigned VF = Dst->getVectorNumElements();
+ auto *DstVTy = cast<FixedVectorType>(Dst);
+ unsigned VF = DstVTy->getNumElements();
unsigned Cost = 0;
// If we know what the widths of the compared operands, get any cost of
// converting it to match Dst. Otherwise assume same widths.
@@ -668,14 +699,50 @@ getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
}
int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
+ // FIXME: Can the logic below also be used for these cost kinds?
+ if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) {
+ int BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
+ return BaseCost == 0 ? BaseCost : 1;
+ }
+
unsigned DstScalarBits = Dst->getScalarSizeInBits();
unsigned SrcScalarBits = Src->getScalarSizeInBits();
- if (Src->isVectorTy()) {
- assert (ST->hasVector() && "getCastInstrCost() called with vector type.");
- assert (Dst->isVectorTy());
- unsigned VF = Src->getVectorNumElements();
+ if (!Src->isVectorTy()) {
+ assert (!Dst->isVectorTy());
+
+ if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
+ if (SrcScalarBits >= 32 ||
+ (I != nullptr && isa<LoadInst>(I->getOperand(0))))
+ return 1;
+ return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/;
+ }
+
+ if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+ Src->isIntegerTy(1)) {
+ if (ST->hasLoadStoreOnCond2())
+ return 2; // li 0; loc 1
+
+ // This should be extension of a compare i1 result, which is done with
+ // ipm and a varying sequence of instructions.
+ unsigned Cost = 0;
+ if (Opcode == Instruction::SExt)
+ Cost = (DstScalarBits < 64 ? 3 : 4);
+ if (Opcode == Instruction::ZExt)
+ Cost = 3;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
+ if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
+ // If operands of an fp-type was compared, this costs +1.
+ Cost++;
+ return Cost;
+ }
+ }
+ else if (ST->hasVector()) {
+ auto *SrcVecTy = cast<FixedVectorType>(Src);
+ auto *DstVecTy = cast<FixedVectorType>(Dst);
+ unsigned VF = SrcVecTy->getNumElements();
unsigned NumDstVectors = getNumVectorRegs(Dst);
unsigned NumSrcVectors = getNumVectorRegs(Src);
@@ -720,7 +787,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// inserting and extracting the values. Base implementation does not
// realize float->int gets scalarized.
unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(),
- Src->getScalarType());
+ Src->getScalarType(), CostKind);
unsigned TotCost = VF * ScalarCost;
bool NeedsInserts = true, NeedsExtracts = true;
// FP128 registers do not get inserted or extracted.
@@ -731,8 +798,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
(Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
NeedsExtracts = false;
- TotCost += getScalarizationOverhead(Src, false, NeedsExtracts);
- TotCost += getScalarizationOverhead(Dst, NeedsInserts, false);
+ TotCost += getScalarizationOverhead(SrcVecTy, false, NeedsExtracts);
+ TotCost += getScalarizationOverhead(DstVecTy, NeedsInserts, false);
// FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4.
if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
@@ -743,7 +810,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
if (Opcode == Instruction::FPTrunc) {
if (SrcScalarBits == 128) // fp128 -> double/float + inserts of elements.
- return VF /*ldxbr/lexbr*/ + getScalarizationOverhead(Dst, true, false);
+ return VF /*ldxbr/lexbr*/ +
+ getScalarizationOverhead(DstVecTy, true, false);
else // double -> float
return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/);
}
@@ -756,40 +824,11 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
return VF * 2;
}
// -> fp128. VF * lxdb/lxeb + extraction of elements.
- return VF + getScalarizationOverhead(Src, false, true);
+ return VF + getScalarizationOverhead(SrcVecTy, false, true);
}
}
- else { // Scalar
- assert (!Dst->isVectorTy());
-
- if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
- if (SrcScalarBits >= 32 ||
- (I != nullptr && isa<LoadInst>(I->getOperand(0))))
- return 1;
- return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/;
- }
- if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
- Src->isIntegerTy(1)) {
- if (ST->hasLoadStoreOnCond2())
- return 2; // li 0; loc 1
-
- // This should be extension of a compare i1 result, which is done with
- // ipm and a varying sequence of instructions.
- unsigned Cost = 0;
- if (Opcode == Instruction::SExt)
- Cost = (DstScalarBits < 64 ? 3 : 4);
- if (Opcode == Instruction::ZExt)
- Cost = 3;
- Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
- if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
- // If operands of an fp-type was compared, this costs +1.
- Cost++;
- return Cost;
- }
- }
-
- return BaseT::getCastInstrCost(Opcode, Dst, Src, I);
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I);
}
// Scalar i8 / i16 operations will typically be made after first extending
@@ -805,10 +844,38 @@ static unsigned getOperandsExtensionCost(const Instruction *I) {
}
int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy, const Instruction *I) {
- if (ValTy->isVectorTy()) {
- assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type.");
- unsigned VF = ValTy->getVectorNumElements();
+ Type *CondTy,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind);
+
+ if (!ValTy->isVectorTy()) {
+ switch (Opcode) {
+ case Instruction::ICmp: {
+ // A loaded value compared with 0 with multiple users becomes Load and
+ // Test. The load is then not foldable, so return 0 cost for the ICmp.
+ unsigned ScalarBits = ValTy->getScalarSizeInBits();
+ if (I != nullptr && ScalarBits >= 32)
+ if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
+ if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
+ C->getZExtValue() == 0)
+ return 0;
+
+ unsigned Cost = 1;
+ if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
+ Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
+ return Cost;
+ }
+ case Instruction::Select:
+ if (ValTy->isFloatingPointTy())
+ return 4; // No load on condition for FP - costs a conditional jump.
+ return 1; // Load On Condition / Select Register.
+ }
+ }
+ else if (ST->hasVector()) {
+ unsigned VF = cast<FixedVectorType>(ValTy)->getNumElements();
// Called with a compare instruction.
if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
@@ -856,32 +923,8 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return getNumVectorRegs(ValTy) /*vsel*/ + PackCost;
}
}
- else { // Scalar
- switch (Opcode) {
- case Instruction::ICmp: {
- // A loaded value compared with 0 with multiple users becomes Load and
- // Test. The load is then not foldable, so return 0 cost for the ICmp.
- unsigned ScalarBits = ValTy->getScalarSizeInBits();
- if (I != nullptr && ScalarBits >= 32)
- if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
- if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
- if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
- C->getZExtValue() == 0)
- return 0;
-
- unsigned Cost = 1;
- if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
- Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
- return Cost;
- }
- case Instruction::Select:
- if (ValTy->isFloatingPointTy())
- return 4; // No load on condition for FP - costs a conditional jump.
- return 1; // Load On Condition / Select Register.
- }
- }
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind);
}
int SystemZTTIImpl::
@@ -995,9 +1038,14 @@ static bool isBswapIntrinsicCall(const Value *V) {
int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
const Instruction *I) {
assert(!Src->isVoidTy() && "Invalid type");
+ // TODO: Handle other cost kinds.
+ if (CostKind != TTI::TCK_RecipThroughput)
+ return 1;
+
if (!Src->isVectorTy() && Opcode == Instruction::Load && I != nullptr) {
// Store the load or its truncated or extended value in FoldedValue.
const Instruction *FoldedValue = nullptr;
@@ -1058,16 +1106,13 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// needed for using / defining the vector operands. The SystemZ version does
// roughly the same but bases the computations on vector permutations
// instead.
-int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond,
- bool UseMaskForGaps) {
+int SystemZTTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace,
+ Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
assert(isa<VectorType>(VecTy) &&
"Expect a vector type for interleaved memory op");
@@ -1075,7 +1120,7 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
// Return the ceiling of dividing A by B.
auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
- unsigned NumElts = VecTy->getVectorNumElements();
+ unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
unsigned VF = NumElts / Factor;
unsigned NumEltsPerVecReg = (128U / getScalarSizeInBits(VecTy));
@@ -1125,22 +1170,10 @@ static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
return -1;
}
-int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args,
- FastMathFlags FMF, unsigned VF) {
- int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
- if (Cost != -1)
- return Cost;
- return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
-}
-
-int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys,
- FastMathFlags FMF,
- unsigned ScalarizationCostPassed) {
- int Cost = getVectorIntrinsicInstrCost(ID, RetTy);
+int SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ int Cost = getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType());
if (Cost != -1)
return Cost;
- return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys,
- FMF, ScalarizationCostPassed);
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index bc4d066881c1..7f8f7f6f923f 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -38,17 +38,21 @@ public:
unsigned getInliningThresholdMultiplier() { return 3; }
- int getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
- int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ Type *Ty, TTI::TargetCostKind CostKind);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2);
/// @}
@@ -60,8 +64,12 @@ public:
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getCacheLineSize() const override { return 256; }
- unsigned getPrefetchDistance() const override { return 2000; }
- unsigned getMinPrefetchStride() const override { return 2048; }
+ unsigned getPrefetchDistance() const override { return 4500; }
+ unsigned getMinPrefetchStride(unsigned NumMemAccesses,
+ unsigned NumStridedMemAccesses,
+ unsigned NumPrefetches,
+ bool HasCall) const override;
+ bool enableWritePrefetching() const override { return true; }
bool hasDivRemOp(Type *DataType, bool IsSigned);
bool prefersVectorizedAddressing() { return false; }
@@ -71,40 +79,39 @@ public:
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
- int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp);
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
const Instruction *I);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
- unsigned AddressSpace, const Instruction *I = nullptr);
-
- int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false);
-
- int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF,
- unsigned VF = 1);
- int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed = UINT_MAX);
+ unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+
+ int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
+
+ int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
/// @}
};