aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/SystemZ
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-05-27 18:44:32 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-05-27 18:44:32 +0000
commit5a5ac124e1efaf208671f01c46edb15f29ed2a0b (patch)
treea6140557876943cdd800ee997c9317283394b22c /lib/Target/SystemZ
parentf03b5bed27d0d2eafd68562ce14f8b5e3f1f0801 (diff)
Notes
Diffstat (limited to 'lib/Target/SystemZ')
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp311
-rw-r--r--lib/Target/SystemZ/CMakeLists.txt2
-rw-r--r--lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp118
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp118
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h9
-rw-r--r--lib/Target/SystemZ/LLVMBuild.txt2
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp10
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp59
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h3
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp52
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp60
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h12
-rw-r--r--lib/Target/SystemZ/SystemZ.h27
-rw-r--r--lib/Target/SystemZ/SystemZ.td1
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.cpp135
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.h10
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.h61
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.td42
-rw-r--r--lib/Target/SystemZ/SystemZConstantPoolValue.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZConstantPoolValue.h8
-rw-r--r--lib/Target/SystemZ/SystemZElimCompare.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.h2
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp230
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp2510
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h215
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td20
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td864
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp70
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h15
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td85
-rw-r--r--lib/Target/SystemZ/SystemZInstrVector.td1097
-rw-r--r--lib/Target/SystemZ/SystemZLDCleanup.cpp143
-rw-r--r--lib/Target/SystemZ/SystemZMCInstLower.cpp9
-rw-r--r--lib/Target/SystemZ/SystemZMachineFunctionInfo.h8
-rw-r--r--lib/Target/SystemZ/SystemZOperands.td137
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td278
-rw-r--r--lib/Target/SystemZ/SystemZPatterns.td14
-rw-r--r--lib/Target/SystemZ/SystemZProcessors.td46
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp3
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h6
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td147
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp47
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp139
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp19
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h24
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp72
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h5
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp258
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h78
50 files changed, 7042 insertions, 551 deletions
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index cb528db9db51c..b721def54e126 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -39,13 +39,17 @@ enum RegisterKind {
ADDR64Reg,
FP32Reg,
FP64Reg,
- FP128Reg
+ FP128Reg,
+ VR32Reg,
+ VR64Reg,
+ VR128Reg
};
enum MemoryKind {
BDMem,
BDXMem,
- BDLMem
+ BDLMem,
+ BDVMem
};
class SystemZOperand : public MCParsedAsmOperand {
@@ -57,6 +61,7 @@ private:
KindReg,
KindAccessReg,
KindImm,
+ KindImmTLS,
KindMem
};
@@ -84,34 +89,42 @@ private:
};
// Base + Disp + Index, where Base and Index are LLVM registers or 0.
- // RegKind says what type the registers have (ADDR32Reg or ADDR64Reg).
- // Length is the operand length for D(L,B)-style operands, otherwise
- // it is null.
+ // MemKind says what type of memory this is and RegKind says what type
+ // the base register has (ADDR32Reg or ADDR64Reg). Length is the operand
+ // length for D(L,B)-style operands, otherwise it is null.
struct MemOp {
- unsigned Base : 8;
- unsigned Index : 8;
- unsigned RegKind : 8;
- unsigned Unused : 8;
+ unsigned Base : 12;
+ unsigned Index : 12;
+ unsigned MemKind : 4;
+ unsigned RegKind : 4;
const MCExpr *Disp;
const MCExpr *Length;
};
+ // Imm is an immediate operand, and Sym is an optional TLS symbol
+ // for use with a __tls_get_offset marker relocation.
+ struct ImmTLSOp {
+ const MCExpr *Imm;
+ const MCExpr *Sym;
+ };
+
union {
TokenOp Token;
RegOp Reg;
unsigned AccessReg;
const MCExpr *Imm;
+ ImmTLSOp ImmTLS;
MemOp Mem;
};
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
if (!Expr)
- Inst.addOperand(MCOperand::CreateImm(0));
+ Inst.addOperand(MCOperand::createImm(0));
else if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
- Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
else
- Inst.addOperand(MCOperand::CreateExpr(Expr));
+ Inst.addOperand(MCOperand::createExpr(Expr));
}
public:
@@ -149,10 +162,11 @@ public:
return Op;
}
static std::unique_ptr<SystemZOperand>
- createMem(RegisterKind RegKind, unsigned Base, const MCExpr *Disp,
- unsigned Index, const MCExpr *Length, SMLoc StartLoc,
- SMLoc EndLoc) {
+ createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base,
+ const MCExpr *Disp, unsigned Index, const MCExpr *Length,
+ SMLoc StartLoc, SMLoc EndLoc) {
auto Op = make_unique<SystemZOperand>(KindMem, StartLoc, EndLoc);
+ Op->Mem.MemKind = MemKind;
Op->Mem.RegKind = RegKind;
Op->Mem.Base = Base;
Op->Mem.Index = Index;
@@ -160,6 +174,14 @@ public:
Op->Mem.Length = Length;
return Op;
}
+ static std::unique_ptr<SystemZOperand>
+ createImmTLS(const MCExpr *Imm, const MCExpr *Sym,
+ SMLoc StartLoc, SMLoc EndLoc) {
+ auto Op = make_unique<SystemZOperand>(KindImmTLS, StartLoc, EndLoc);
+ Op->ImmTLS.Imm = Imm;
+ Op->ImmTLS.Sym = Sym;
+ return Op;
+ }
// Token operands
bool isToken() const override {
@@ -200,24 +222,40 @@ public:
return Imm;
}
+ // Immediate operands with optional TLS symbol.
+ bool isImmTLS() const {
+ return Kind == KindImmTLS;
+ }
+
// Memory operands.
bool isMem() const override {
return Kind == KindMem;
}
- bool isMem(RegisterKind RegKind, MemoryKind MemKind) const {
+ bool isMem(MemoryKind MemKind) const {
return (Kind == KindMem &&
- Mem.RegKind == RegKind &&
- (MemKind == BDXMem || !Mem.Index) &&
- (MemKind == BDLMem) == (Mem.Length != nullptr));
+ (Mem.MemKind == MemKind ||
+ // A BDMem can be treated as a BDXMem in which the index
+ // register field is 0.
+ (Mem.MemKind == BDMem && MemKind == BDXMem)));
+ }
+ bool isMem(MemoryKind MemKind, RegisterKind RegKind) const {
+ return isMem(MemKind) && Mem.RegKind == RegKind;
}
- bool isMemDisp12(RegisterKind RegKind, MemoryKind MemKind) const {
- return isMem(RegKind, MemKind) && inRange(Mem.Disp, 0, 0xfff);
+ bool isMemDisp12(MemoryKind MemKind, RegisterKind RegKind) const {
+ return isMem(MemKind, RegKind) && inRange(Mem.Disp, 0, 0xfff);
}
- bool isMemDisp20(RegisterKind RegKind, MemoryKind MemKind) const {
- return isMem(RegKind, MemKind) && inRange(Mem.Disp, -524288, 524287);
+ bool isMemDisp20(MemoryKind MemKind, RegisterKind RegKind) const {
+ return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287);
}
bool isMemDisp12Len8(RegisterKind RegKind) const {
- return isMemDisp12(RegKind, BDLMem) && inRange(Mem.Length, 1, 0x100);
+ return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length, 1, 0x100);
+ }
+ void addBDVAddrOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands");
+ assert(isMem(BDVMem) && "Invalid operand type");
+ Inst.addOperand(MCOperand::createReg(Mem.Base));
+ addExpr(Inst, Mem.Disp);
+ Inst.addOperand(MCOperand::createReg(Mem.Index));
}
// Override MCParsedAsmOperand.
@@ -229,12 +267,12 @@ public:
// to an instruction.
void addRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands");
- Inst.addOperand(MCOperand::CreateReg(getReg()));
+ Inst.addOperand(MCOperand::createReg(getReg()));
}
void addAccessRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands");
assert(Kind == KindAccessReg && "Invalid operand type");
- Inst.addOperand(MCOperand::CreateImm(AccessReg));
+ Inst.addOperand(MCOperand::createImm(AccessReg));
}
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands");
@@ -242,24 +280,31 @@ public:
}
void addBDAddrOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands");
- assert(Kind == KindMem && Mem.Index == 0 && "Invalid operand type");
- Inst.addOperand(MCOperand::CreateReg(Mem.Base));
+ assert(isMem(BDMem) && "Invalid operand type");
+ Inst.addOperand(MCOperand::createReg(Mem.Base));
addExpr(Inst, Mem.Disp);
}
void addBDXAddrOperands(MCInst &Inst, unsigned N) const {
assert(N == 3 && "Invalid number of operands");
- assert(Kind == KindMem && "Invalid operand type");
- Inst.addOperand(MCOperand::CreateReg(Mem.Base));
+ assert(isMem(BDXMem) && "Invalid operand type");
+ Inst.addOperand(MCOperand::createReg(Mem.Base));
addExpr(Inst, Mem.Disp);
- Inst.addOperand(MCOperand::CreateReg(Mem.Index));
+ Inst.addOperand(MCOperand::createReg(Mem.Index));
}
void addBDLAddrOperands(MCInst &Inst, unsigned N) const {
assert(N == 3 && "Invalid number of operands");
- assert(Kind == KindMem && "Invalid operand type");
- Inst.addOperand(MCOperand::CreateReg(Mem.Base));
+ assert(isMem(BDLMem) && "Invalid operand type");
+ Inst.addOperand(MCOperand::createReg(Mem.Base));
addExpr(Inst, Mem.Disp);
addExpr(Inst, Mem.Length);
}
+ void addImmTLSOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands");
+ assert(Kind == KindImmTLS && "Invalid operand type");
+ addExpr(Inst, ImmTLS.Imm);
+ if (ImmTLS.Sym)
+ addExpr(Inst, ImmTLS.Sym);
+ }
// Used by the TableGen code to check for particular operand types.
bool isGR32() const { return isReg(GR32Reg); }
@@ -273,17 +318,26 @@ public:
bool isFP32() const { return isReg(FP32Reg); }
bool isFP64() const { return isReg(FP64Reg); }
bool isFP128() const { return isReg(FP128Reg); }
- bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, BDMem); }
- bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, BDMem); }
- bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDMem); }
- bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDMem); }
- bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDXMem); }
- bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDXMem); }
+ bool isVR32() const { return isReg(VR32Reg); }
+ bool isVR64() const { return isReg(VR64Reg); }
+ bool isVF128() const { return false; }
+ bool isVR128() const { return isReg(VR128Reg); }
+ bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); }
+ bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); }
+ bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, ADDR64Reg); }
+ bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, ADDR64Reg); }
+ bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, ADDR64Reg); }
+ bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, ADDR64Reg); }
bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); }
+ bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, ADDR64Reg); }
+ bool isU1Imm() const { return isImm(0, 1); }
+ bool isU2Imm() const { return isImm(0, 3); }
+ bool isU3Imm() const { return isImm(0, 7); }
bool isU4Imm() const { return isImm(0, 15); }
bool isU6Imm() const { return isImm(0, 63); }
bool isU8Imm() const { return isImm(0, 255); }
bool isS8Imm() const { return isImm(-128, 127); }
+ bool isU12Imm() const { return isImm(0, 4095); }
bool isU16Imm() const { return isImm(0, 65535); }
bool isS16Imm() const { return isImm(-32768, 32767); }
bool isU32Imm() const { return isImm(0, (1LL << 32) - 1); }
@@ -300,6 +354,7 @@ private:
enum RegisterGroup {
RegGR,
RegFP,
+ RegV,
RegAccess
};
struct Register {
@@ -318,12 +373,15 @@ private:
RegisterKind Kind);
bool parseAddress(unsigned &Base, const MCExpr *&Disp,
- unsigned &Index, const MCExpr *&Length,
+ unsigned &Index, bool &IsVector, const MCExpr *&Length,
const unsigned *Regs, RegisterKind RegKind);
OperandMatchResultTy parseAddress(OperandVector &Operands,
- const unsigned *Regs, RegisterKind RegKind,
- MemoryKind MemKind);
+ MemoryKind MemKind, const unsigned *Regs,
+ RegisterKind RegKind);
+
+ OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal,
+ int64_t MaxVal, bool AllowTLS);
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
@@ -382,26 +440,45 @@ public:
OperandMatchResultTy parseFP128(OperandVector &Operands) {
return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg);
}
+ OperandMatchResultTy parseVR32(OperandVector &Operands) {
+ return parseRegister(Operands, RegV, SystemZMC::VR32Regs, VR32Reg);
+ }
+ OperandMatchResultTy parseVR64(OperandVector &Operands) {
+ return parseRegister(Operands, RegV, SystemZMC::VR64Regs, VR64Reg);
+ }
+ OperandMatchResultTy parseVF128(OperandVector &Operands) {
+ llvm_unreachable("Shouldn't be used as an operand");
+ }
+ OperandMatchResultTy parseVR128(OperandVector &Operands) {
+ return parseRegister(Operands, RegV, SystemZMC::VR128Regs, VR128Reg);
+ }
OperandMatchResultTy parseBDAddr32(OperandVector &Operands) {
- return parseAddress(Operands, SystemZMC::GR32Regs, ADDR32Reg, BDMem);
+ return parseAddress(Operands, BDMem, SystemZMC::GR32Regs, ADDR32Reg);
}
OperandMatchResultTy parseBDAddr64(OperandVector &Operands) {
- return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDMem);
+ return parseAddress(Operands, BDMem, SystemZMC::GR64Regs, ADDR64Reg);
}
OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) {
- return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDXMem);
+ return parseAddress(Operands, BDXMem, SystemZMC::GR64Regs, ADDR64Reg);
}
OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) {
- return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDLMem);
+ return parseAddress(Operands, BDLMem, SystemZMC::GR64Regs, ADDR64Reg);
+ }
+ OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) {
+ return parseAddress(Operands, BDVMem, SystemZMC::GR64Regs, ADDR64Reg);
}
OperandMatchResultTy parseAccessReg(OperandVector &Operands);
- OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal,
- int64_t MaxVal);
OperandMatchResultTy parsePCRel16(OperandVector &Operands) {
- return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1);
+ return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, false);
}
OperandMatchResultTy parsePCRel32(OperandVector &Operands) {
- return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1);
+ return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, false);
+ }
+ OperandMatchResultTy parsePCRelTLS16(OperandVector &Operands) {
+ return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, true);
+ }
+ OperandMatchResultTy parsePCRelTLS32(OperandVector &Operands) {
+ return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, true);
}
};
} // end anonymous namespace
@@ -443,6 +520,8 @@ bool SystemZAsmParser::parseRegister(Register &Reg) {
Reg.Group = RegGR;
else if (Prefix == 'f' && Reg.Num < 16)
Reg.Group = RegFP;
+ else if (Prefix == 'v' && Reg.Num < 32)
+ Reg.Group = RegV;
else if (Prefix == 'a' && Reg.Num < 16)
Reg.Group = RegAccess;
else
@@ -493,8 +572,8 @@ SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group,
// Regs maps asm register numbers to LLVM register numbers and RegKind
// says what kind of address register we're using (ADDR32Reg or ADDR64Reg).
bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
- unsigned &Index, const MCExpr *&Length,
- const unsigned *Regs,
+ unsigned &Index, bool &IsVector,
+ const MCExpr *&Length, const unsigned *Regs,
RegisterKind RegKind) {
// Parse the displacement, which must always be present.
if (getParser().parseExpression(Disp))
@@ -503,6 +582,7 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
// Parse the optional base and index.
Index = 0;
Base = 0;
+ IsVector = false;
Length = nullptr;
if (getLexer().is(AsmToken::LParen)) {
Parser.Lex();
@@ -510,12 +590,23 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
if (getLexer().is(AsmToken::Percent)) {
// Parse the first register and decide whether it's a base or an index.
Register Reg;
- if (parseRegister(Reg, RegGR, Regs, RegKind))
+ if (parseRegister(Reg))
return true;
- if (getLexer().is(AsmToken::Comma))
- Index = Reg.Num;
- else
- Base = Reg.Num;
+ if (Reg.Group == RegV) {
+ // A vector index register. The base register is optional.
+ IsVector = true;
+ Index = SystemZMC::VR128Regs[Reg.Num];
+ } else if (Reg.Group == RegGR) {
+ if (Reg.Num == 0)
+ return Error(Reg.StartLoc, "%r0 used in an address");
+ // If the are two registers, the first one is the index and the
+ // second is the base.
+ if (getLexer().is(AsmToken::Comma))
+ Index = Regs[Reg.Num];
+ else
+ Base = Regs[Reg.Num];
+ } else
+ return Error(Reg.StartLoc, "invalid address register");
} else {
// Parse the length.
if (getParser().parseExpression(Length))
@@ -542,37 +633,46 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
// Parse a memory operand and add it to Operands. The other arguments
// are as above.
SystemZAsmParser::OperandMatchResultTy
-SystemZAsmParser::parseAddress(OperandVector &Operands, const unsigned *Regs,
- RegisterKind RegKind, MemoryKind MemKind) {
+SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
+ const unsigned *Regs, RegisterKind RegKind) {
SMLoc StartLoc = Parser.getTok().getLoc();
unsigned Base, Index;
+ bool IsVector;
const MCExpr *Disp;
const MCExpr *Length;
- if (parseAddress(Base, Disp, Index, Length, Regs, RegKind))
+ if (parseAddress(Base, Disp, Index, IsVector, Length, Regs, RegKind))
return MatchOperand_ParseFail;
- if (Index && MemKind != BDXMem)
- {
- Error(StartLoc, "invalid use of indexed addressing");
- return MatchOperand_ParseFail;
- }
+ if (IsVector && MemKind != BDVMem) {
+ Error(StartLoc, "invalid use of vector addressing");
+ return MatchOperand_ParseFail;
+ }
- if (Length && MemKind != BDLMem)
- {
- Error(StartLoc, "invalid use of length addressing");
- return MatchOperand_ParseFail;
- }
+ if (!IsVector && MemKind == BDVMem) {
+ Error(StartLoc, "vector index required in address");
+ return MatchOperand_ParseFail;
+ }
- if (!Length && MemKind == BDLMem)
- {
- Error(StartLoc, "missing length in address");
- return MatchOperand_ParseFail;
- }
+ if (Index && MemKind != BDXMem && MemKind != BDVMem) {
+ Error(StartLoc, "invalid use of indexed addressing");
+ return MatchOperand_ParseFail;
+ }
+
+ if (Length && MemKind != BDLMem) {
+ Error(StartLoc, "invalid use of length addressing");
+ return MatchOperand_ParseFail;
+ }
+
+ if (!Length && MemKind == BDLMem) {
+ Error(StartLoc, "missing length in address");
+ return MatchOperand_ParseFail;
+ }
SMLoc EndLoc =
SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- Operands.push_back(SystemZOperand::createMem(RegKind, Base, Disp, Index,
- Length, StartLoc, EndLoc));
+ Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp,
+ Index, Length, StartLoc,
+ EndLoc));
return MatchOperand_Success;
}
@@ -589,6 +689,8 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
RegNo = SystemZMC::GR64Regs[Reg.Num];
else if (Reg.Group == RegFP)
RegNo = SystemZMC::FP64Regs[Reg.Num];
+ else if (Reg.Group == RegV)
+ RegNo = SystemZMC::VR128Regs[Reg.Num];
else
// FIXME: Access registers aren't modelled as LLVM registers yet.
return Error(Reg.StartLoc, "invalid operand for instruction");
@@ -661,8 +763,10 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
// so we treat any plain expression as an immediate.
SMLoc StartLoc = Parser.getTok().getLoc();
unsigned Base, Index;
+ bool IsVector;
const MCExpr *Expr, *Length;
- if (parseAddress(Base, Expr, Index, Length, SystemZMC::GR64Regs, ADDR64Reg))
+ if (parseAddress(Base, Expr, Index, IsVector, Length, SystemZMC::GR64Regs,
+ ADDR64Reg))
return true;
SMLoc EndLoc =
@@ -743,7 +847,7 @@ SystemZAsmParser::parseAccessReg(OperandVector &Operands) {
SystemZAsmParser::OperandMatchResultTy
SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
- int64_t MaxVal) {
+ int64_t MaxVal, bool AllowTLS) {
MCContext &Ctx = getContext();
MCStreamer &Out = getStreamer();
const MCExpr *Expr;
@@ -759,16 +863,61 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
Error(StartLoc, "offset out of range");
return MatchOperand_ParseFail;
}
- MCSymbol *Sym = Ctx.CreateTempSymbol();
+ MCSymbol *Sym = Ctx.createTempSymbol();
Out.EmitLabel(Sym);
const MCExpr *Base = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
Ctx);
Expr = Value == 0 ? Base : MCBinaryExpr::CreateAdd(Base, Expr, Ctx);
}
+ // Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol.
+ const MCExpr *Sym = nullptr;
+ if (AllowTLS && getLexer().is(AsmToken::Colon)) {
+ Parser.Lex();
+
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), "unexpected token");
+ return MatchOperand_ParseFail;
+ }
+
+ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+ StringRef Name = Parser.getTok().getString();
+ if (Name == "tls_gdcall")
+ Kind = MCSymbolRefExpr::VK_TLSGD;
+ else if (Name == "tls_ldcall")
+ Kind = MCSymbolRefExpr::VK_TLSLDM;
+ else {
+ Error(Parser.getTok().getLoc(), "unknown TLS tag");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex();
+
+ if (Parser.getTok().isNot(AsmToken::Colon)) {
+ Error(Parser.getTok().getLoc(), "unexpected token");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex();
+
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), "unexpected token");
+ return MatchOperand_ParseFail;
+ }
+
+ StringRef Identifier = Parser.getTok().getString();
+ Sym = MCSymbolRefExpr::Create(Ctx.getOrCreateSymbol(Identifier),
+ Kind, Ctx);
+ Parser.Lex();
+ }
+
SMLoc EndLoc =
SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
+
+ if (AllowTLS)
+ Operands.push_back(SystemZOperand::createImmTLS(Expr, Sym,
+ StartLoc, EndLoc));
+ else
+ Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
+
return MatchOperand_Success;
}
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 41a614d9d151a..336f037bb7330 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_target(SystemZCodeGen
SystemZISelDAGToDAG.cpp
SystemZISelLowering.cpp
SystemZInstrInfo.cpp
+ SystemZLDCleanup.cpp
SystemZLongBranch.cpp
SystemZMachineFunctionInfo.cpp
SystemZMCInstLower.cpp
@@ -28,6 +29,7 @@ add_llvm_target(SystemZCodeGen
SystemZShortenInst.cpp
SystemZSubtarget.cpp
SystemZTargetMachine.cpp
+ SystemZTargetTransformInfo.cpp
)
add_subdirectory(AsmParser)
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 23173bfbd91bb..bf67b75d53377 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -25,7 +25,7 @@ class SystemZDisassembler : public MCDisassembler {
public:
SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
- virtual ~SystemZDisassembler() {}
+ ~SystemZDisassembler() override {}
DecodeStatus getInstruction(MCInst &instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -47,74 +47,94 @@ extern "C" void LLVMInitializeSystemZDisassembler() {
}
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
- const unsigned *Regs) {
- assert(RegNo < 16 && "Invalid register");
+ const unsigned *Regs, unsigned Size) {
+ assert(RegNo < Size && "Invalid register");
RegNo = Regs[RegNo];
if (RegNo == 0)
return MCDisassembler::Fail;
- Inst.addOperand(MCOperand::CreateReg(RegNo));
+ Inst.addOperand(MCOperand::createReg(RegNo));
return MCDisassembler::Success;
}
static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs);
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs, 16);
}
static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs);
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs, 16);
}
static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs);
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16);
}
static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs);
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs, 16);
}
static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs);
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16);
}
static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs);
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs, 16);
}
static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs);
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs, 16);
}
static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs);
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs, 16);
+}
+
+static DecodeStatus DecodeVR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::VR32Regs, 32);
+}
+
+static DecodeStatus DecodeVR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::VR64Regs, 32);
+}
+
+static DecodeStatus DecodeVR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::VR128Regs, 32);
}
template<unsigned N>
static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) {
- assert(isUInt<N>(Imm) && "Invalid immediate");
- Inst.addOperand(MCOperand::CreateImm(Imm));
+ if (!isUInt<N>(Imm))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(Imm));
return MCDisassembler::Success;
}
template<unsigned N>
static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) {
- assert(isUInt<N>(Imm) && "Invalid immediate");
- Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm)));
+ if (!isUInt<N>(Imm))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm)));
return MCDisassembler::Success;
}
@@ -124,6 +144,21 @@ static DecodeStatus decodeAccessRegOperand(MCInst &Inst, uint64_t Imm,
return decodeUImmOperand<4>(Inst, Imm);
}
+static DecodeStatus decodeU1ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<1>(Inst, Imm);
+}
+
+static DecodeStatus decodeU2ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<2>(Inst, Imm);
+}
+
+static DecodeStatus decodeU3ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<3>(Inst, Imm);
+}
+
static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm,
uint64_t Address, const void *Decoder) {
return decodeUImmOperand<4>(Inst, Imm);
@@ -139,6 +174,11 @@ static DecodeStatus decodeU8ImmOperand(MCInst &Inst, uint64_t Imm,
return decodeUImmOperand<8>(Inst, Imm);
}
+static DecodeStatus decodeU12ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address, const void *Decoder) {
+ return decodeUImmOperand<12>(Inst, Imm);
+}
+
static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm,
uint64_t Address, const void *Decoder) {
return decodeUImmOperand<16>(Inst, Imm);
@@ -168,7 +208,7 @@ template<unsigned N>
static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm,
uint64_t Address) {
assert(isUInt<N>(Imm) && "Invalid PC-relative offset");
- Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm) * 2 + Address));
+ Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm) * 2 + Address));
return MCDisassembler::Success;
}
@@ -189,8 +229,8 @@ static DecodeStatus decodeBDAddr12Operand(MCInst &Inst, uint64_t Field,
uint64_t Base = Field >> 12;
uint64_t Disp = Field & 0xfff;
assert(Base < 16 && "Invalid BDAddr12");
- Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
- Inst.addOperand(MCOperand::CreateImm(Disp));
+ Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(Disp));
return MCDisassembler::Success;
}
@@ -199,8 +239,8 @@ static DecodeStatus decodeBDAddr20Operand(MCInst &Inst, uint64_t Field,
uint64_t Base = Field >> 20;
uint64_t Disp = ((Field << 12) & 0xff000) | ((Field >> 8) & 0xfff);
assert(Base < 16 && "Invalid BDAddr20");
- Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
- Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp)));
+ Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(SignExtend64<20>(Disp)));
return MCDisassembler::Success;
}
@@ -210,9 +250,9 @@ static DecodeStatus decodeBDXAddr12Operand(MCInst &Inst, uint64_t Field,
uint64_t Base = (Field >> 12) & 0xf;
uint64_t Disp = Field & 0xfff;
assert(Index < 16 && "Invalid BDXAddr12");
- Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
- Inst.addOperand(MCOperand::CreateImm(Disp));
- Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index]));
+ Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(Disp));
+ Inst.addOperand(MCOperand::createReg(Index == 0 ? 0 : Regs[Index]));
return MCDisassembler::Success;
}
@@ -222,9 +262,9 @@ static DecodeStatus decodeBDXAddr20Operand(MCInst &Inst, uint64_t Field,
uint64_t Base = (Field >> 20) & 0xf;
uint64_t Disp = ((Field & 0xfff00) >> 8) | ((Field & 0xff) << 12);
assert(Index < 16 && "Invalid BDXAddr20");
- Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
- Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp)));
- Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index]));
+ Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(SignExtend64<20>(Disp)));
+ Inst.addOperand(MCOperand::createReg(Index == 0 ? 0 : Regs[Index]));
return MCDisassembler::Success;
}
@@ -234,9 +274,21 @@ static DecodeStatus decodeBDLAddr12Len8Operand(MCInst &Inst, uint64_t Field,
uint64_t Base = (Field >> 12) & 0xf;
uint64_t Disp = Field & 0xfff;
assert(Length < 256 && "Invalid BDLAddr12Len8");
- Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
- Inst.addOperand(MCOperand::CreateImm(Disp));
- Inst.addOperand(MCOperand::CreateImm(Length + 1));
+ Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(Disp));
+ Inst.addOperand(MCOperand::createImm(Length + 1));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBDVAddr12Operand(MCInst &Inst, uint64_t Field,
+ const unsigned *Regs) {
+ uint64_t Index = Field >> 16;
+ uint64_t Base = (Field >> 12) & 0xf;
+ uint64_t Disp = Field & 0xfff;
+ assert(Index < 32 && "Invalid BDVAddr12");
+ Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+ Inst.addOperand(MCOperand::createImm(Disp));
+ Inst.addOperand(MCOperand::createReg(SystemZMC::VR128Regs[Index]));
return MCDisassembler::Success;
}
@@ -283,6 +335,12 @@ static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst,
return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs);
}
+static DecodeStatus decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeBDVAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
#include "SystemZGenDisassemblerTables.inc"
DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index d2ba9b6f54c35..373ddfa7e2577 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -9,7 +9,10 @@
#include "SystemZInstPrinter.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -21,13 +24,17 @@ using namespace llvm;
void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp,
unsigned Index, raw_ostream &O) {
O << Disp;
- if (Base) {
+ if (Base || Index) {
O << '(';
- if (Index)
- O << '%' << getRegisterName(Index) << ',';
- O << '%' << getRegisterName(Base) << ')';
- } else
- assert(!Index && "Shouldn't have an index without a base");
+ if (Index) {
+ O << '%' << getRegisterName(Index);
+ if (Base)
+ O << ',';
+ }
+ if (Base)
+ O << '%' << getRegisterName(Base);
+ O << ')';
+ }
}
void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) {
@@ -42,7 +49,8 @@ void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) {
}
void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot,
+ const MCSubtargetInfo &STI) {
printInstruction(MI, O);
printAnnotation(O, Annot);
}
@@ -51,60 +59,78 @@ void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
O << '%' << getRegisterName(RegNo);
}
-void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum,
- raw_ostream &O) {
+template<unsigned N>
+void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
int64_t Value = MI->getOperand(OpNum).getImm();
- assert(isUInt<4>(Value) && "Invalid u4imm argument");
+ assert(isUInt<N>(Value) && "Invalid uimm argument");
O << Value;
}
-void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum,
- raw_ostream &O) {
+template<unsigned N>
+void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
int64_t Value = MI->getOperand(OpNum).getImm();
- assert(isUInt<6>(Value) && "Invalid u6imm argument");
+ assert(isInt<N>(Value) && "Invalid simm argument");
O << Value;
}
+void SystemZInstPrinter::printU1ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<1>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU2ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<2>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU3ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<3>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<4>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<6>(MI, OpNum, O);
+}
+
void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
- int64_t Value = MI->getOperand(OpNum).getImm();
- assert(isInt<8>(Value) && "Invalid s8imm argument");
- O << Value;
+ printSImmOperand<8>(MI, OpNum, O);
}
void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
- int64_t Value = MI->getOperand(OpNum).getImm();
- assert(isUInt<8>(Value) && "Invalid u8imm argument");
- O << Value;
+ printUImmOperand<8>(MI, OpNum, O);
+}
+
+void SystemZInstPrinter::printU12ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printUImmOperand<12>(MI, OpNum, O);
}
void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
- int64_t Value = MI->getOperand(OpNum).getImm();
- assert(isInt<16>(Value) && "Invalid s16imm argument");
- O << Value;
+ printSImmOperand<16>(MI, OpNum, O);
}
void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
- int64_t Value = MI->getOperand(OpNum).getImm();
- assert(isUInt<16>(Value) && "Invalid u16imm argument");
- O << Value;
+ printUImmOperand<16>(MI, OpNum, O);
}
void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
- int64_t Value = MI->getOperand(OpNum).getImm();
- assert(isInt<32>(Value) && "Invalid s32imm argument");
- O << Value;
+ printSImmOperand<32>(MI, OpNum, O);
}
void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
- int64_t Value = MI->getOperand(OpNum).getImm();
- assert(isUInt<32>(Value) && "Invalid u32imm argument");
- O << Value;
+ printUImmOperand<32>(MI, OpNum, O);
}
void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum,
@@ -124,6 +150,29 @@ void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
O << *MO.getExpr();
}
+void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ // Output the PC-relative operand.
+ printPCRelOperand(MI, OpNum, O);
+
+ // Output the TLS marker if present.
+ if ((unsigned)OpNum + 1 < MI->getNumOperands()) {
+ const MCOperand &MO = MI->getOperand(OpNum + 1);
+ const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*MO.getExpr());
+ switch (refExp.getKind()) {
+ case MCSymbolRefExpr::VK_TLSGD:
+ O << ":tls_gdcall:";
+ break;
+ case MCSymbolRefExpr::VK_TLSLDM:
+ O << ":tls_ldcall:";
+ break;
+ default:
+ llvm_unreachable("Unexpected symbol kind");
+ }
+ O << refExp.getSymbol().getName();
+ }
+}
+
void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
printOperand(MI->getOperand(OpNum), O);
@@ -153,6 +202,13 @@ void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum,
O << ')';
}
+void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printAddress(MI->getOperand(OpNum).getReg(),
+ MI->getOperand(OpNum + 1).getImm(),
+ MI->getOperand(OpNum + 2).getReg(), O);
+}
+
void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum,
raw_ostream &O) {
static const char *const CondNames[] = {
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index 753903cf06d55..847b6962e6f20 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -39,7 +39,8 @@ public:
// Override MCInstPrinter.
void printRegName(raw_ostream &O, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
private:
// Print various types of operand.
@@ -47,15 +48,21 @@ private:
void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printBDVAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU1ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU2ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU3ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU12ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O);
// Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
diff --git a/lib/Target/SystemZ/LLVMBuild.txt b/lib/Target/SystemZ/LLVMBuild.txt
index 542aaee773583..6f8431db7b11c 100644
--- a/lib/Target/SystemZ/LLVMBuild.txt
+++ b/lib/Target/SystemZ/LLVMBuild.txt
@@ -31,5 +31,5 @@ has_jit = 1
type = Library
name = SystemZCodeGen
parent = SystemZ
-required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
+required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 6e7268de55c16..1c3887ab54560 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -27,9 +27,10 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
switch (unsigned(Kind)) {
case SystemZ::FK_390_PC16DBL:
case SystemZ::FK_390_PC32DBL:
- case SystemZ::FK_390_PLT16DBL:
- case SystemZ::FK_390_PLT32DBL:
return (int64_t)Value / 2;
+
+ case SystemZ::FK_390_TLS_CALL:
+ return 0;
}
llvm_unreachable("Unknown fixup kind!");
@@ -61,7 +62,7 @@ public:
llvm_unreachable("SystemZ does do not have assembler relaxation");
}
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createSystemZObjectWriter(OS, OSABI);
}
};
@@ -72,8 +73,7 @@ SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
const static MCFixupKindInfo Infos[SystemZ::NumTargetFixupKinds] = {
{ "FK_390_PC16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
{ "FK_390_PC32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "FK_390_PLT16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
- { "FK_390_PLT32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
+ { "FK_390_TLS_CALL", 0, 0, 0 }
};
if (Kind < FirstTargetFixupKind)
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index 27b4bd855b3e4..c9290c1922d32 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -16,7 +16,9 @@
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
using namespace llvm;
@@ -32,10 +34,10 @@ public:
: MCII(mcii), Ctx(ctx) {
}
- ~SystemZMCCodeEmitter() {}
+ ~SystemZMCCodeEmitter() override {}
// OVerride MCCodeEmitter.
- void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -70,37 +72,55 @@ private:
uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ uint64_t getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
// Operand OpNum of MI needs a PC-relative fixup of kind Kind at
// Offset bytes from the start of MI. Add the fixup to Fixups
// and return the in-place addend, which since we're a RELA target
- // is always 0.
+ // is always 0. If AllowTLS is true and optional operand OpNum + 1
+ // is present, also emit a TLS call fixup for it.
uint64_t getPCRelEncoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
- unsigned Kind, int64_t Offset) const;
+ unsigned Kind, int64_t Offset,
+ bool AllowTLS) const;
uint64_t getPC16DBLEncoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2);
+ return getPCRelEncoding(MI, OpNum, Fixups,
+ SystemZ::FK_390_PC16DBL, 2, false);
}
uint64_t getPC32DBLEncoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2);
+ return getPCRelEncoding(MI, OpNum, Fixups,
+ SystemZ::FK_390_PC32DBL, 2, false);
+ }
+ uint64_t getPC16DBLTLSEncoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getPCRelEncoding(MI, OpNum, Fixups,
+ SystemZ::FK_390_PC16DBL, 2, true);
+ }
+ uint64_t getPC32DBLTLSEncoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getPCRelEncoding(MI, OpNum, Fixups,
+ SystemZ::FK_390_PC32DBL, 2, true);
}
};
} // end anonymous namespace
MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &MCSTI,
MCContext &Ctx) {
return new SystemZMCCodeEmitter(MCII, Ctx);
}
void SystemZMCCodeEmitter::
-EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
@@ -178,10 +198,22 @@ getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
return (Len << 16) | (Base << 12) | Disp;
}
+uint64_t SystemZMCCodeEmitter::
+getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
+ assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<5>(Index));
+ return (Index << 16) | (Base << 12) | Disp;
+}
+
uint64_t
SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
- unsigned Kind, int64_t Offset) const {
+ unsigned Kind, int64_t Offset,
+ bool AllowTLS) const {
const MCOperand &MO = MI.getOperand(OpNum);
const MCExpr *Expr;
if (MO.isImm())
@@ -197,7 +229,14 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
}
}
- Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind));
+ Fixups.push_back(MCFixup::create(Offset, Expr, (MCFixupKind)Kind));
+
+ // Output the fixup for the TLS marker if present.
+ if (AllowTLS && OpNum + 1 < MI.getNumOperands()) {
+ const MCOperand &MOTLS = MI.getOperand(OpNum + 1);
+ Fixups.push_back(MCFixup::create(0, MOTLS.getExpr(),
+ (MCFixupKind)SystemZ::FK_390_TLS_CALL));
+ }
return 0;
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
index 52a8d1d6600b3..229ab5dc86fb0 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
@@ -18,8 +18,7 @@ enum FixupKind {
// These correspond directly to R_390_* relocations.
FK_390_PC16DBL = FirstTargetFixupKind,
FK_390_PC32DBL,
- FK_390_PLT16DBL,
- FK_390_PLT32DBL,
+ FK_390_TLS_CALL,
// Marker
LastTargetFixupKind,
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index c6a1816588963..ee1af023769e8 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -20,7 +20,7 @@ class SystemZObjectWriter : public MCELFObjectTargetWriter {
public:
SystemZObjectWriter(uint8_t OSABI);
- virtual ~SystemZObjectWriter();
+ ~SystemZObjectWriter() override;
protected:
// Override MCELFObjectTargetWriter.
@@ -55,8 +55,6 @@ static unsigned getPCRelReloc(unsigned Kind) {
case FK_Data_8: return ELF::R_390_PC64;
case SystemZ::FK_390_PC16DBL: return ELF::R_390_PC16DBL;
case SystemZ::FK_390_PC32DBL: return ELF::R_390_PC32DBL;
- case SystemZ::FK_390_PLT16DBL: return ELF::R_390_PLT16DBL;
- case SystemZ::FK_390_PLT32DBL: return ELF::R_390_PLT32DBL;
}
llvm_unreachable("Unsupported PC-relative address");
}
@@ -70,6 +68,35 @@ static unsigned getTLSLEReloc(unsigned Kind) {
llvm_unreachable("Unsupported absolute address");
}
+// Return the R_390_TLS_LDO* relocation type for MCFixupKind Kind.
+static unsigned getTLSLDOReloc(unsigned Kind) {
+ switch (Kind) {
+ case FK_Data_4: return ELF::R_390_TLS_LDO32;
+ case FK_Data_8: return ELF::R_390_TLS_LDO64;
+ }
+ llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the R_390_TLS_LDM* relocation type for MCFixupKind Kind.
+static unsigned getTLSLDMReloc(unsigned Kind) {
+ switch (Kind) {
+ case FK_Data_4: return ELF::R_390_TLS_LDM32;
+ case FK_Data_8: return ELF::R_390_TLS_LDM64;
+ case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_LDCALL;
+ }
+ llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the R_390_TLS_GD* relocation type for MCFixupKind Kind.
+static unsigned getTLSGDReloc(unsigned Kind) {
+ switch (Kind) {
+ case FK_Data_4: return ELF::R_390_TLS_GD32;
+ case FK_Data_8: return ELF::R_390_TLS_GD64;
+ case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_GDCALL;
+ }
+ llvm_unreachable("Unsupported absolute address");
+}
+
// Return the PLT relocation counterpart of MCFixupKind Kind.
static unsigned getPLTReloc(unsigned Kind) {
switch (Kind) {
@@ -94,6 +121,23 @@ unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target,
assert(!IsPCRel && "NTPOFF shouldn't be PC-relative");
return getTLSLEReloc(Kind);
+ case MCSymbolRefExpr::VK_INDNTPOFF:
+ if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
+ return ELF::R_390_TLS_IEENT;
+ llvm_unreachable("Only PC-relative INDNTPOFF accesses are supported for now");
+
+ case MCSymbolRefExpr::VK_DTPOFF:
+ assert(!IsPCRel && "DTPOFF shouldn't be PC-relative");
+ return getTLSLDOReloc(Kind);
+
+ case MCSymbolRefExpr::VK_TLSLDM:
+ assert(!IsPCRel && "TLSLDM shouldn't be PC-relative");
+ return getTLSLDMReloc(Kind);
+
+ case MCSymbolRefExpr::VK_TLSGD:
+ assert(!IsPCRel && "TLSGD shouldn't be PC-relative");
+ return getTLSGDReloc(Kind);
+
case MCSymbolRefExpr::VK_GOT:
if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
return ELF::R_390_GOTENT;
@@ -108,7 +152,7 @@ unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target,
}
}
-MCObjectWriter *llvm::createSystemZObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createSystemZObjectWriter(raw_pwrite_stream &OS,
uint8_t OSABI) {
MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI);
return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 6e82b6d98ae43..8c2075afe505e 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -12,6 +12,7 @@
#include "SystemZMCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
@@ -76,6 +77,39 @@ const unsigned SystemZMC::FP128Regs[16] = {
SystemZ::F12Q, SystemZ::F13Q, 0, 0
};
+const unsigned SystemZMC::VR32Regs[32] = {
+ SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S,
+ SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S,
+ SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
+ SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
+ SystemZ::F16S, SystemZ::F17S, SystemZ::F18S, SystemZ::F19S,
+ SystemZ::F20S, SystemZ::F21S, SystemZ::F22S, SystemZ::F23S,
+ SystemZ::F24S, SystemZ::F25S, SystemZ::F26S, SystemZ::F27S,
+ SystemZ::F28S, SystemZ::F29S, SystemZ::F30S, SystemZ::F31S
+};
+
+const unsigned SystemZMC::VR64Regs[32] = {
+ SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D,
+ SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D,
+ SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
+ SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D,
+ SystemZ::F16D, SystemZ::F17D, SystemZ::F18D, SystemZ::F19D,
+ SystemZ::F20D, SystemZ::F21D, SystemZ::F22D, SystemZ::F23D,
+ SystemZ::F24D, SystemZ::F25D, SystemZ::F26D, SystemZ::F27D,
+ SystemZ::F28D, SystemZ::F29D, SystemZ::F30D, SystemZ::F31D
+};
+
+const unsigned SystemZMC::VR128Regs[32] = {
+ SystemZ::V0, SystemZ::V1, SystemZ::V2, SystemZ::V3,
+ SystemZ::V4, SystemZ::V5, SystemZ::V6, SystemZ::V7,
+ SystemZ::V8, SystemZ::V9, SystemZ::V10, SystemZ::V11,
+ SystemZ::V12, SystemZ::V13, SystemZ::V14, SystemZ::V15,
+ SystemZ::V16, SystemZ::V17, SystemZ::V18, SystemZ::V19,
+ SystemZ::V20, SystemZ::V21, SystemZ::V22, SystemZ::V23,
+ SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27,
+ SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31
+};
+
unsigned SystemZMC::getFirstReg(unsigned Reg) {
static unsigned Map[SystemZ::NUM_TARGET_REGS];
static bool Initialized = false;
@@ -85,10 +119,13 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) {
Map[GRH32Regs[I]] = I;
Map[GR64Regs[I]] = I;
Map[GR128Regs[I]] = I;
- Map[FP32Regs[I]] = I;
- Map[FP64Regs[I]] = I;
Map[FP128Regs[I]] = I;
}
+ for (unsigned I = 0; I < 32; ++I) {
+ Map[VR32Regs[I]] = I;
+ Map[VR64Regs[I]] = I;
+ Map[VR128Regs[I]] = I;
+ }
}
assert(Reg < SystemZ::NUM_TARGET_REGS);
return Map[Reg];
@@ -168,27 +205,18 @@ static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
CM = CodeModel::Small;
else if (CM == CodeModel::JITDefault)
CM = RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium;
- X->InitMCCodeGenInfo(RM, CM, OL);
+ X->initMCCodeGenInfo(RM, CM, OL);
return X;
}
-static MCInstPrinter *createSystemZMCInstPrinter(const Target &T,
+static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI) {
return new SystemZInstPrinter(MAI, MII, MRI);
}
-static MCStreamer *
-createSystemZMCObjectStreamer(const Target &T, StringRef TT, MCContext &Ctx,
- MCAsmBackend &MAB, raw_ostream &OS,
- MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
-}
-
extern "C" void LLVMInitializeSystemZTargetMC() {
// Register the MCAsmInfo.
TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget,
@@ -221,8 +249,4 @@ extern "C" void LLVMInitializeSystemZTargetMC() {
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheSystemZTarget,
createSystemZMCInstPrinter);
-
- // Register the MCObjectStreamer;
- TargetRegistry::RegisterMCObjectStreamer(TheSystemZTarget,
- createSystemZMCObjectStreamer);
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 5eb6526a5c000..36ea750ec8dc7 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -23,6 +23,7 @@ class MCRegisterInfo;
class MCSubtargetInfo;
class StringRef;
class Target;
+class raw_pwrite_stream;
class raw_ostream;
extern Target TheSystemZTarget;
@@ -48,6 +49,9 @@ extern const unsigned GR128Regs[16];
extern const unsigned FP32Regs[16];
extern const unsigned FP64Regs[16];
extern const unsigned FP128Regs[16];
+extern const unsigned VR32Regs[32];
+extern const unsigned VR64Regs[32];
+extern const unsigned VR128Regs[32];
// Return the 0-based number of the first architectural register that
// contains the given LLVM register. E.g. R1D -> 1.
@@ -67,18 +71,22 @@ inline unsigned getRegAsGR32(unsigned Reg) {
inline unsigned getRegAsGRH32(unsigned Reg) {
return GRH32Regs[getFirstReg(Reg)];
}
+
+// Return the given register as a VR128.
+inline unsigned getRegAsVR128(unsigned Reg) {
+ return VR128Regs[getFirstReg(Reg)];
+}
} // end namespace SystemZMC
MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
-MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI);
+MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
} // end namespace llvm
// Defines symbolic names for SystemZ registers.
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index c8b95b2b2ca71..cafe2c5948c44 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -68,6 +68,25 @@ const unsigned CCMASK_TM_MSB_0 = CCMASK_0 | CCMASK_1;
const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3;
const unsigned CCMASK_TM = CCMASK_ANY;
+// Condition-code mask assignments for TRANSACTION_BEGIN.
+const unsigned CCMASK_TBEGIN_STARTED = CCMASK_0;
+const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1;
+const unsigned CCMASK_TBEGIN_TRANSIENT = CCMASK_2;
+const unsigned CCMASK_TBEGIN_PERSISTENT = CCMASK_3;
+const unsigned CCMASK_TBEGIN = CCMASK_ANY;
+
+// Condition-code mask assignments for TRANSACTION_END.
+const unsigned CCMASK_TEND_TX = CCMASK_0;
+const unsigned CCMASK_TEND_NOTX = CCMASK_2;
+const unsigned CCMASK_TEND = CCMASK_TEND_TX | CCMASK_TEND_NOTX;
+
+// Condition-code mask assignments for vector comparisons (and similar
+// operations).
+const unsigned CCMASK_VCMP_ALL = CCMASK_0;
+const unsigned CCMASK_VCMP_MIXED = CCMASK_1;
+const unsigned CCMASK_VCMP_NONE = CCMASK_3;
+const unsigned CCMASK_VCMP = CCMASK_0 | CCMASK_1 | CCMASK_3;
+
// The position of the low CC bit in an IPM result.
const unsigned IPM_CC = 28;
@@ -75,6 +94,13 @@ const unsigned IPM_CC = 28;
const unsigned PFD_READ = 1;
const unsigned PFD_WRITE = 2;
+// Number of bits in a vector register.
+const unsigned VectorBits = 128;
+
+// Number of bytes in a vector register (and consequently the number of
+// bytes in a general permute vector).
+const unsigned VectorBytes = VectorBits / 8;
+
// Return true if Val fits an LLILL operand.
static inline bool isImmLL(uint64_t Val) {
return (Val & ~0x000000000000ffffULL) == 0;
@@ -111,6 +137,7 @@ FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
} // end namespace llvm
#endif
diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td
index 5f829034902f4..d4d636d3479c5 100644
--- a/lib/Target/SystemZ/SystemZ.td
+++ b/lib/Target/SystemZ/SystemZ.td
@@ -40,6 +40,7 @@ include "SystemZOperands.td"
include "SystemZPatterns.td"
include "SystemZInstrFormats.td"
include "SystemZInstrInfo.td"
+include "SystemZInstrVector.td"
include "SystemZInstrFP.td"
def SystemZInstrInfo : InstrInfo {}
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index f4f3ec7a97332..a0d079fcc3598 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -66,6 +66,41 @@ static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) {
.addImm(MI->getOperand(5).getImm());
}
+static const MCSymbolRefExpr *getTLSGetOffset(MCContext &Context) {
+ StringRef Name = "__tls_get_offset";
+ return MCSymbolRefExpr::Create(Context.getOrCreateSymbol(Name),
+ MCSymbolRefExpr::VK_PLT,
+ Context);
+}
+
+static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) {
+ StringRef Name = "_GLOBAL_OFFSET_TABLE_";
+ return MCSymbolRefExpr::Create(Context.getOrCreateSymbol(Name),
+ MCSymbolRefExpr::VK_None,
+ Context);
+}
+
+// MI loads the high part of a vector from memory. Return an instruction
+// that uses replicating vector load Opcode to do the same thing.
+static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) {
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(MI->getOperand(3).getReg());
+}
+
+// MI stores the high part of a vector to memory. Return an instruction
+// that uses elemental vector store Opcode to do the same thing.
+static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) {
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(MI->getOperand(3).getReg())
+ .addImm(0);
+}
+
void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SystemZMCInstLower Lower(MF->getContext(), *this);
MCInst LoweredMI;
@@ -95,6 +130,26 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D);
break;
+ case SystemZ::TLS_GDCALL:
+ LoweredMI = MCInstBuilder(SystemZ::BRASL)
+ .addReg(SystemZ::R14D)
+ .addExpr(getTLSGetOffset(MF->getContext()))
+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSGD));
+ break;
+
+ case SystemZ::TLS_LDCALL:
+ LoweredMI = MCInstBuilder(SystemZ::BRASL)
+ .addReg(SystemZ::R14D)
+ .addExpr(getTLSGetOffset(MF->getContext()))
+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSLDM));
+ break;
+
+ case SystemZ::GOT:
+ LoweredMI = MCInstBuilder(SystemZ::LARL)
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(getGlobalOffsetTable(MF->getContext()));
+ break;
+
case SystemZ::IILF64:
LoweredMI = MCInstBuilder(SystemZ::IILF)
.addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
@@ -117,6 +172,51 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG);
break;
+ case SystemZ::VLVGP32:
+ LoweredMI = MCInstBuilder(SystemZ::VLVGP)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(1).getReg()))
+ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg()));
+ break;
+
+ case SystemZ::VLR32:
+ case SystemZ::VLR64:
+ LoweredMI = MCInstBuilder(SystemZ::VLR)
+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg()));
+ break;
+
+ case SystemZ::VL32:
+ LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF);
+ break;
+
+ case SystemZ::VL64:
+ LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPG);
+ break;
+
+ case SystemZ::VST32:
+ LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEF);
+ break;
+
+ case SystemZ::VST64:
+ LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEG);
+ break;
+
+ case SystemZ::LFER:
+ LoweredMI = MCInstBuilder(SystemZ::VLGVF)
+ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg()))
+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg()))
+ .addReg(0).addImm(0);
+ break;
+
+ case SystemZ::LEFR:
+ LoweredMI = MCInstBuilder(SystemZ::VLVGF)
+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(0).addImm(0);
+ break;
+
#define LOWER_LOW(NAME) \
case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break
@@ -152,7 +252,7 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
#undef LOWER_HIGH
case SystemZ::Serialize:
- if (Subtarget->hasFastSerialization())
+ if (MF->getSubtarget<SystemZSubtarget>().hasFastSerialization())
LoweredMI = MCInstBuilder(SystemZ::AsmBCR)
.addImm(14).addReg(SystemZ::R0D);
else
@@ -164,7 +264,7 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
Lower.lower(MI, LoweredMI);
break;
}
- EmitToStreamer(OutStreamer, LoweredMI);
+ EmitToStreamer(*OutStreamer, LoweredMI);
}
// Convert a SystemZ-specific constant pool modifier into the associated
@@ -172,6 +272,9 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
static MCSymbolRefExpr::VariantKind
getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) {
switch (Modifier) {
+ case SystemZCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD;
+ case SystemZCP::TLSLDM: return MCSymbolRefExpr::VK_TLSLDM;
+ case SystemZCP::DTPOFF: return MCSymbolRefExpr::VK_DTPOFF;
case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF;
}
llvm_unreachable("Invalid SystemCPModifier!");
@@ -185,10 +288,9 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
MCSymbolRefExpr::Create(getSymbol(ZCPV->getGlobalValue()),
getModifierVariantKind(ZCPV->getModifier()),
OutContext);
- uint64_t Size =
- TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(ZCPV->getType());
+ uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType());
- OutStreamer.EmitValue(Expr, Size);
+ OutStreamer->EmitValue(Expr, Size);
}
bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
@@ -219,29 +321,6 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
-void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
- if (Subtarget->isTargetELF()) {
- auto &TLOFELF =
- static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
- MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
- // Output stubs for external and common global variables.
- MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
- if (!Stubs.empty()) {
- OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
-
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- OutStreamer.EmitLabel(Stubs[i].first);
- OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
- TD->getPointerSize(0));
- }
- Stubs.clear();
- }
- }
-}
-
// Force static initialization.
extern "C" void LLVMInitializeSystemZAsmPrinter() {
RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h
index 64672792a8765..7f6e823729dc7 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -22,14 +22,9 @@ class Module;
class raw_ostream;
class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter {
-private:
- const SystemZSubtarget *Subtarget;
-
public:
- SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) {
- Subtarget = &TM.getSubtarget<SystemZSubtarget>();
- }
+ SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)) {}
// Override AsmPrinter.
const char *getPassName() const override {
@@ -43,7 +38,6 @@ public:
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &OS) override;
- void EmitEndOfAsmFile(Module &M) override;
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZCallingConv.h b/lib/Target/SystemZ/SystemZCallingConv.h
index 71605ac112685..bff0706618aa8 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/lib/Target/SystemZ/SystemZCallingConv.h
@@ -10,6 +10,9 @@
#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
namespace llvm {
namespace SystemZ {
const unsigned NumArgGPRs = 5;
@@ -18,6 +21,64 @@ namespace SystemZ {
const unsigned NumArgFPRs = 4;
extern const unsigned ArgFPRs[NumArgFPRs];
} // end namespace SystemZ
+
+class SystemZCCState : public CCState {
+private:
+ /// Records whether the value was a fixed argument.
+ /// See ISD::OutputArg::IsFixed.
+ SmallVector<bool, 4> ArgIsFixed;
+
+ /// Records whether the value was widened from a short vector type.
+ SmallVector<bool, 4> ArgIsShortVector;
+
+ // Check whether ArgVT is a short vector type.
+ bool IsShortVectorType(EVT ArgVT) {
+ return ArgVT.isVector() && ArgVT.getStoreSize() <= 8;
+ }
+
+public:
+ SystemZCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+ SmallVectorImpl<CCValAssign> &locs, LLVMContext &C)
+ : CCState(CC, isVarArg, MF, locs, C) {}
+
+ void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ // Formal arguments are always fixed.
+ ArgIsFixed.clear();
+ for (unsigned i = 0; i < Ins.size(); ++i)
+ ArgIsFixed.push_back(true);
+ // Record whether the call operand was a short vector.
+ ArgIsShortVector.clear();
+ for (unsigned i = 0; i < Ins.size(); ++i)
+ ArgIsShortVector.push_back(IsShortVectorType(Ins[i].ArgVT));
+
+ CCState::AnalyzeFormalArguments(Ins, Fn);
+ }
+
+ void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ // Record whether the call operand was a fixed argument.
+ ArgIsFixed.clear();
+ for (unsigned i = 0; i < Outs.size(); ++i)
+ ArgIsFixed.push_back(Outs[i].IsFixed);
+ // Record whether the call operand was a short vector.
+ ArgIsShortVector.clear();
+ for (unsigned i = 0; i < Outs.size(); ++i)
+ ArgIsShortVector.push_back(IsShortVectorType(Outs[i].ArgVT));
+
+ CCState::AnalyzeCallOperands(Outs, Fn);
+ }
+
+ // This version of AnalyzeCallOperands in the base class is not usable
+ // since we must provide a means of accessing ISD::OutputArg::IsFixed.
+ void AnalyzeCallOperands(const SmallVectorImpl<MVT> &Outs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ CCAssignFn Fn) = delete;
+
+ bool IsFixed(unsigned ValNo) { return ArgIsFixed[ValNo]; }
+ bool IsShortVector(unsigned ValNo) { return ArgIsShortVector[ValNo]; }
+};
+
} // end namespace llvm
#endif
diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td
index fb0d1d8a3fe7e..be8f00b57adb5 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/lib/Target/SystemZ/SystemZCallingConv.td
@@ -12,6 +12,20 @@
class CCIfExtend<CCAction A>
: CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("static_cast<const SystemZSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).", F),
+ A>;
+
+// Match if this specific argument is a fixed (i.e. named) argument.
+class CCIfFixed<CCAction A>
+ : CCIf<"static_cast<SystemZCCState *>(&State)->IsFixed(ValNo)", A>;
+
+// Match if this specific argument was widened from a short vector type.
+class CCIfShortVector<CCAction A>
+ : CCIf<"static_cast<SystemZCCState *>(&State)->IsShortVector(ValNo)", A>;
+
+
//===----------------------------------------------------------------------===//
// z/Linux return value calling convention
//===----------------------------------------------------------------------===//
@@ -31,7 +45,14 @@ def RetCC_SystemZ : CallingConv<[
// doesn't care about the ABI. All floating-point argument registers
// are call-clobbered, so we can use all of them here.
CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
- CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>
+ CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
+
+ // Similarly for vectors, with V24 being the ABI-compliant choice.
+ // Sub-128 vectors are returned in the same way, but they're widened
+ // to one of these types during type legalization.
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>>
// ABI-compliant code returns long double by reference, but that conversion
// is left to higher-level code. Perhaps we could add an f128 definition
@@ -60,6 +81,25 @@ def CC_SystemZ : CallingConv<[
CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
+ // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors
+ // are passed in the same way, but they're widened to one of these types
+ // during type legalization.
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfFixed<CCAssignToReg<[V24, V26, V28, V30,
+ V25, V27, V29, V31]>>>>,
+
+ // However, sub-128 vectors which need to go on the stack occupy just a
+ // single 8-byte-aligned 8-byte stack slot. Pass as i64.
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfShortVector<CCBitConvertToType<i64>>>>,
+
+ // Other vector arguments are passed in 8-byte-aligned 16-byte stack slots.
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToStack<16, 8>>>,
+
// Other arguments are passed in 8-byte-aligned 8-byte stack slots.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
]>;
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
index 19cec219e2d1d..44ea1d25f08e3 100644
--- a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -28,6 +28,11 @@ SystemZConstantPoolValue::Create(const GlobalValue *GV,
unsigned SystemZConstantPoolValue::getRelocationInfo() const {
switch (Modifier) {
+ case SystemZCP::TLSGD:
+ case SystemZCP::TLSLDM:
+ case SystemZCP::DTPOFF:
+ // May require a dynamic relocation.
+ return 2;
case SystemZCP::NTPOFF:
// May require a relocation, but the relocations are always resolved
// by the static linker.
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.h b/lib/Target/SystemZ/SystemZConstantPoolValue.h
index 0bd8c205ea4d0..e5f1bb18581ba 100644
--- a/lib/Target/SystemZ/SystemZConstantPoolValue.h
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -19,13 +19,17 @@ class GlobalValue;
namespace SystemZCP {
enum SystemZCPModifier {
+ TLSGD,
+ TLSLDM,
+ DTPOFF,
NTPOFF
};
} // end namespace SystemZCP
/// A SystemZ-specific constant pool value. At present, the only
-/// defined constant pool values are offsets of thread-local variables
-/// (written x@NTPOFF).
+/// defined constant pool values are module IDs or offsets of
+/// thread-local variables (written x@TLSGD, x@TLSLDM, x@DTPOFF,
+/// or x@NTPOFF).
class SystemZConstantPoolValue : public MachineConstantPoolValue {
const GlobalValue *GV;
SystemZCP::SystemZCPModifier Modifier;
diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index ce99ee5bc4127..16f9adc79f176 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -47,7 +47,7 @@ struct Reference {
return *this;
}
- LLVM_EXPLICIT operator bool() const { return Def || Use; }
+ explicit operator bool() const { return Def || Use; }
// True if the register is defined or used in some form, either directly or
// via a sub- or super-register.
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index eff4ae3baf3f5..a636b35635ce3 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -309,8 +309,9 @@ static void emitIncrement(MachineBasicBlock &MBB,
}
}
-void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front();
+void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
MachineFrameInfo *MFFrame = MF.getFrameInfo();
auto *ZII =
static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
index cefa56fd74e5b..60bad894ee44d 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -40,7 +40,7 @@ public:
override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const override;
- void emitPrologue(MachineFunction &MF) const override;
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
bool hasFP(const MachineFunction &MF) const override;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 5f84624c38ea7..63992936813d7 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -127,12 +127,11 @@ struct RxSBGOperands {
};
class SystemZDAGToDAGISel : public SelectionDAGISel {
- const SystemZTargetLowering &Lowering;
- const SystemZSubtarget &Subtarget;
+ const SystemZSubtarget *Subtarget;
// Used by SystemZOperands.td to create integer constants.
inline SDValue getImm(const SDNode *Node, uint64_t Imm) const {
- return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
+ return CurDAG->getTargetConstant(Imm, SDLoc(Node), Node->getValueType(0));
}
const SystemZTargetMachine &getTargetMachine() const {
@@ -140,7 +139,7 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
}
const SystemZInstrInfo *getInstrInfo() const {
- return getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ return Subtarget->getInstrInfo();
}
// Try to fold more of the base or index of AM into AM, where IsBase
@@ -256,6 +255,13 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
Addr, Base, Disp, Index);
}
+ // Try to match Addr as an address with a base, 12-bit displacement
+ // and index, where the index is element Elem of a vector.
+ // Return true on success, storing the base, displacement and vector
+ // in Base, Disp and Index respectively.
+ bool selectBDVAddr12Only(SDValue Addr, SDValue Elem, SDValue &Base,
+ SDValue &Disp, SDValue &Index) const;
+
// Check whether (or Op (and X InsertMask)) is effectively an insertion
// of X into bits InsertMask of some Y != Op. Return true if so and
// set Op to that Y.
@@ -293,6 +299,12 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
uint64_t UpperVal, uint64_t LowerVal);
+ // Try to use gather instruction Opcode to implement vector insertion N.
+ SDNode *tryGather(SDNode *N, unsigned Opcode);
+
+ // Try to use scatter instruction Opcode to implement store Store.
+ SDNode *tryScatter(StoreSDNode *Store, unsigned Opcode);
+
// Return true if Load and Store are loads and stores of the same size
// and are guaranteed not to overlap. Such operations can be implemented
// using block (SS-format) instructions.
@@ -315,9 +327,12 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
public:
SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(TM, OptLevel),
- Lowering(*TM.getSubtargetImpl()->getTargetLowering()),
- Subtarget(*TM.getSubtargetImpl()) {}
+ : SelectionDAGISel(TM, OptLevel) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ Subtarget = &MF.getSubtarget<SystemZSubtarget>();
+ return SelectionDAGISel::runOnMachineFunction(MF);
+ }
// Override MachineFunctionPass.
const char *getPassName() const override {
@@ -326,7 +341,7 @@ public:
// Override SelectionDAGISel.
SDNode *Select(SDNode *Node) override;
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
// Include the pieces autogenerated from the target description.
@@ -594,7 +609,7 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
}
// Lower the displacement to a TargetConstant.
- Disp = CurDAG->getTargetConstant(AM.Disp, VT);
+ Disp = CurDAG->getTargetConstant(AM.Disp, SDLoc(Base), VT);
}
void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
@@ -643,6 +658,30 @@ bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,
return true;
}
+bool SystemZDAGToDAGISel::selectBDVAddr12Only(SDValue Addr, SDValue Elem,
+ SDValue &Base,
+ SDValue &Disp,
+ SDValue &Index) const {
+ SDValue Regs[2];
+ if (selectBDXAddr12Only(Addr, Regs[0], Disp, Regs[1]) &&
+ Regs[0].getNode() && Regs[1].getNode()) {
+ for (unsigned int I = 0; I < 2; ++I) {
+ Base = Regs[I];
+ Index = Regs[1 - I];
+ // We can't tell here whether the index vector has the right type
+ // for the access; the caller needs to do that instead.
+ if (Index.getOpcode() == ISD::ZERO_EXTEND)
+ Index = Index.getOperand(0);
+ if (Index.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Index.getOperand(1) == Elem) {
+ Index = Index.getOperand(0);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
uint64_t InsertMask) const {
// We're only interested in cases where the insertion is into some operand
@@ -862,6 +901,7 @@ SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const {
}
SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
+ SDLoc DL(N);
EVT VT = N->getValueType(0);
RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0));
unsigned Count = 0;
@@ -887,7 +927,7 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
// Force the new mask into the DAG, since it may include known-one bits.
auto *MaskN = cast<ConstantSDNode>(N->getOperand(1).getNode());
if (MaskN->getZExtValue() != RISBG.Mask) {
- SDValue NewMask = CurDAG->getConstant(RISBG.Mask, VT);
+ SDValue NewMask = CurDAG->getConstant(RISBG.Mask, DL, VT);
N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask);
return SelectCode(N);
}
@@ -896,22 +936,25 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
}
unsigned Opcode = SystemZ::RISBG;
+ // Prefer RISBGN if available, since it does not clobber CC.
+ if (Subtarget->hasMiscellaneousExtensions())
+ Opcode = SystemZ::RISBGN;
EVT OpcodeVT = MVT::i64;
- if (VT == MVT::i32 && Subtarget.hasHighWord()) {
+ if (VT == MVT::i32 && Subtarget->hasHighWord()) {
Opcode = SystemZ::RISBMux;
OpcodeVT = MVT::i32;
RISBG.Start &= 31;
RISBG.End &= 31;
}
SDValue Ops[5] = {
- getUNDEF(SDLoc(N), OpcodeVT),
- convertTo(SDLoc(N), OpcodeVT, RISBG.Input),
- CurDAG->getTargetConstant(RISBG.Start, MVT::i32),
- CurDAG->getTargetConstant(RISBG.End | 128, MVT::i32),
- CurDAG->getTargetConstant(RISBG.Rotate, MVT::i32)
+ getUNDEF(DL, OpcodeVT),
+ convertTo(DL, OpcodeVT, RISBG.Input),
+ CurDAG->getTargetConstant(RISBG.Start, DL, MVT::i32),
+ CurDAG->getTargetConstant(RISBG.End | 128, DL, MVT::i32),
+ CurDAG->getTargetConstant(RISBG.Rotate, DL, MVT::i32)
};
- N = CurDAG->getMachineNode(Opcode, SDLoc(N), OpcodeVT, Ops);
- return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode();
+ N = CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops);
+ return convertTo(DL, VT, SDValue(N, 0)).getNode();
}
SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
@@ -943,19 +986,24 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
// See whether we can avoid an AND in the first operand by converting
// ROSBG to RISBG.
- if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask))
+ if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) {
Opcode = SystemZ::RISBG;
-
+ // Prefer RISBGN if available, since it does not clobber CC.
+ if (Subtarget->hasMiscellaneousExtensions())
+ Opcode = SystemZ::RISBGN;
+ }
+
+ SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue Ops[5] = {
- convertTo(SDLoc(N), MVT::i64, Op0),
- convertTo(SDLoc(N), MVT::i64, RxSBG[I].Input),
- CurDAG->getTargetConstant(RxSBG[I].Start, MVT::i32),
- CurDAG->getTargetConstant(RxSBG[I].End, MVT::i32),
- CurDAG->getTargetConstant(RxSBG[I].Rotate, MVT::i32)
+ convertTo(DL, MVT::i64, Op0),
+ convertTo(DL, MVT::i64, RxSBG[I].Input),
+ CurDAG->getTargetConstant(RxSBG[I].Start, DL, MVT::i32),
+ CurDAG->getTargetConstant(RxSBG[I].End, DL, MVT::i32),
+ CurDAG->getTargetConstant(RxSBG[I].Rotate, DL, MVT::i32)
};
- N = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, Ops);
- return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode();
+ N = CurDAG->getMachineNode(Opcode, DL, MVT::i64, Ops);
+ return convertTo(DL, VT, SDValue(N, 0)).getNode();
}
SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
@@ -963,16 +1011,81 @@ SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
uint64_t LowerVal) {
EVT VT = Node->getValueType(0);
SDLoc DL(Node);
- SDValue Upper = CurDAG->getConstant(UpperVal, VT);
+ SDValue Upper = CurDAG->getConstant(UpperVal, DL, VT);
if (Op0.getNode())
Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper);
Upper = SDValue(Select(Upper.getNode()), 0);
- SDValue Lower = CurDAG->getConstant(LowerVal, VT);
+ SDValue Lower = CurDAG->getConstant(LowerVal, DL, VT);
SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower);
return Or.getNode();
}
+SDNode *SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
+ SDValue ElemV = N->getOperand(2);
+ auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
+ if (!ElemN)
+ return 0;
+
+ unsigned Elem = ElemN->getZExtValue();
+ EVT VT = N->getValueType(0);
+ if (Elem >= VT.getVectorNumElements())
+ return 0;
+
+ auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1));
+ if (!Load || !Load->hasOneUse())
+ return 0;
+ if (Load->getMemoryVT().getSizeInBits() !=
+ Load->getValueType(0).getSizeInBits())
+ return 0;
+
+ SDValue Base, Disp, Index;
+ if (!selectBDVAddr12Only(Load->getBasePtr(), ElemV, Base, Disp, Index) ||
+ Index.getValueType() != VT.changeVectorElementTypeToInteger())
+ return 0;
+
+ SDLoc DL(Load);
+ SDValue Ops[] = {
+ N->getOperand(0), Base, Disp, Index,
+ CurDAG->getTargetConstant(Elem, DL, MVT::i32), Load->getChain()
+ };
+ SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops);
+ ReplaceUses(SDValue(Load, 1), SDValue(Res, 1));
+ return Res;
+}
+
+SDNode *SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) {
+ SDValue Value = Store->getValue();
+ if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return 0;
+ if (Store->getMemoryVT().getSizeInBits() !=
+ Value.getValueType().getSizeInBits())
+ return 0;
+
+ SDValue ElemV = Value.getOperand(1);
+ auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
+ if (!ElemN)
+ return 0;
+
+ SDValue Vec = Value.getOperand(0);
+ EVT VT = Vec.getValueType();
+ unsigned Elem = ElemN->getZExtValue();
+ if (Elem >= VT.getVectorNumElements())
+ return 0;
+
+ SDValue Base, Disp, Index;
+ if (!selectBDVAddr12Only(Store->getBasePtr(), ElemV, Base, Disp, Index) ||
+ Index.getValueType() != VT.changeVectorElementTypeToInteger())
+ return 0;
+
+ SDLoc DL(Store);
+ SDValue Ops[] = {
+ Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, DL, MVT::i32),
+ Store->getChain()
+ };
+ return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+}
+
bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
LoadSDNode *Load) const {
// Check that the two memory operands have the same size.
@@ -1102,13 +1215,33 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
uint64_t ConstCCMask =
cast<ConstantSDNode>(CCMask.getNode())->getZExtValue();
// Invert the condition.
- CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask,
+ CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, SDLoc(Node),
CCMask.getValueType());
SDValue Op4 = Node->getOperand(4);
Node = CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4);
}
break;
}
+
+ case ISD::INSERT_VECTOR_ELT: {
+ EVT VT = Node->getValueType(0);
+ unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
+ if (ElemBitSize == 32)
+ ResNode = tryGather(Node, SystemZ::VGEF);
+ else if (ElemBitSize == 64)
+ ResNode = tryGather(Node, SystemZ::VGEG);
+ break;
+ }
+
+ case ISD::STORE: {
+ auto *Store = cast<StoreSDNode>(Node);
+ unsigned ElemBitSize = Store->getValue().getValueType().getSizeInBits();
+ if (ElemBitSize == 32)
+ ResNode = tryScatter(Store, SystemZ::VSCEF);
+ else if (ElemBitSize == 64)
+ ResNode = tryScatter(Store, SystemZ::VSCEG);
+ break;
+ }
}
// Select the default instruction
@@ -1127,18 +1260,29 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
bool SystemZDAGToDAGISel::
SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
- assert(ConstraintCode == 'm' && "Unexpected constraint code");
- // Accept addresses with short displacements, which are compatible
- // with Q, R, S and T. But keep the index operand for future expansion.
- SDValue Base, Disp, Index;
- if (!selectBDXAddr(SystemZAddressingMode::FormBD,
- SystemZAddressingMode::Disp12Only,
- Op, Base, Disp, Index))
- return true;
- OutOps.push_back(Base);
- OutOps.push_back(Disp);
- OutOps.push_back(Index);
- return false;
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_Q:
+ case InlineAsm::Constraint_R:
+ case InlineAsm::Constraint_S:
+ case InlineAsm::Constraint_T:
+ // Accept addresses with short displacements, which are compatible
+ // with Q, R, S and T. But keep the index operand for future expansion.
+ SDValue Base, Disp, Index;
+ if (selectBDXAddr(SystemZAddressingMode::FormBD,
+ SystemZAddressingMode::Disp12Only,
+ Op, Base, Disp, Index)) {
+ OutOps.push_back(Base);
+ OutOps.push_back(Disp);
+ OutOps.push_back(Index);
+ return false;
+ }
+ break;
+ }
+ return true;
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index f7ac1ca299109..24b5a41d7f675 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Intrinsics.h"
#include <cctype>
using namespace llvm;
@@ -80,9 +81,9 @@ static MachineOperand earlyUseOperand(MachineOperand Op) {
return Op;
}
-SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
- : TargetLowering(tm),
- Subtarget(tm.getSubtarget<SystemZSubtarget>()) {
+SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
+ const SystemZSubtarget &STI)
+ : TargetLowering(tm), Subtarget(STI) {
MVT PtrVT = getPointerTy();
// Set up the register classes.
@@ -90,13 +91,27 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
else
addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
- addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
- addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
- addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
+ addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
+ if (Subtarget.hasVector()) {
+ addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
+ addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
+ } else {
+ addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
+ addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
+ }
addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
+ if (Subtarget.hasVector()) {
+ addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
+ }
+
// Compute derived properties from the register classes
- computeRegisterProperties();
+ computeRegisterProperties(Subtarget.getRegisterInfo());
// Set up special registers.
setExceptionPointerRegister(SystemZ::R6D);
@@ -110,7 +125,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
setSchedulingPreference(Sched::RegPressure);
setBooleanContents(ZeroOrOneBooleanContent);
- setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// Instructions are strings of 2-byte aligned 2-byte values.
setMinFunctionAlignment(2);
@@ -163,8 +178,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
// available, or if the operand is constant.
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+ // Use POPCNT on z196 and above.
+ if (Subtarget.hasPopulationCount())
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ else
+ setOperationAction(ISD::CTPOP, VT, Expand);
+
// No special instructions for these.
- setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
@@ -244,6 +264,90 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
// Handle prefetches with PFD or PFDRL.
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ // Assume by default that all vector operations need to be expanded.
+ for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
+ if (getOperationAction(Opcode, VT) == Legal)
+ setOperationAction(Opcode, VT, Expand);
+
+ // Likewise all truncating stores and extending loads.
+ for (MVT InnerVT : MVT::vector_valuetypes()) {
+ setTruncStoreAction(VT, InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+ }
+
+ if (isTypeLegal(VT)) {
+ // These operations are legal for anything that can be stored in a
+ // vector register, even if there is no native support for the format
+ // as such. In particular, we can do these for v4f32 even though there
+ // are no specific instructions for that format.
+ setOperationAction(ISD::LOAD, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Legal);
+ setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::BITCAST, VT, Legal);
+ setOperationAction(ISD::UNDEF, VT, Legal);
+
+ // Likewise, except that we need to replace the nodes with something
+ // more specific.
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ }
+ }
+
+ // Handle integer vector types.
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ if (isTypeLegal(VT)) {
+ // These operations have direct equivalents.
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::ADD, VT, Legal);
+ setOperationAction(ISD::SUB, VT, Legal);
+ if (VT != MVT::v2i64)
+ setOperationAction(ISD::MUL, VT, Legal);
+ setOperationAction(ISD::AND, VT, Legal);
+ setOperationAction(ISD::OR, VT, Legal);
+ setOperationAction(ISD::XOR, VT, Legal);
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ setOperationAction(ISD::CTTZ, VT, Legal);
+ setOperationAction(ISD::CTLZ, VT, Legal);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
+
+ // Convert a GPR scalar to a vector by inserting it into element 0.
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+
+ // Use a series of unpacks for extensions.
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
+
+ // Detect shifts by a scalar amount and convert them into
+ // V*_BY_SCALAR.
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+
+ // At present ROTL isn't matched by DAGCombiner. ROTR should be
+ // converted into ROTL.
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+
+ // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
+ // and inverting the result as necessary.
+ setOperationAction(ISD::SETCC, VT, Custom);
+ }
+ }
+
+ if (Subtarget.hasVector()) {
+ // There should be no need to check for float types other than v2f64
+ // since <2 x f32> isn't a legal type.
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ }
+
// Handle floating-point types.
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
I <= MVT::LAST_FP_VALUETYPE;
@@ -269,6 +373,36 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
}
}
+ // Handle floating-point vector types.
+ if (Subtarget.hasVector()) {
+ // Scalar-to-vector conversion is just a subreg.
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+
+ // Some insertions and extractions can be done directly but others
+ // need to go via integers.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
+
+ // These operations have direct equivalents.
+ setOperationAction(ISD::FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FABS, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+ }
+
// We have fused multiply-addition for f32 and f64 but not f128.
setOperationAction(ISD::FMA, MVT::f32, Legal);
setOperationAction(ISD::FMA, MVT::f64, Legal);
@@ -287,8 +421,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
// We have 64-bit FPR<->GPR moves, but need special handling for
// 32-bit forms.
- setOperationAction(ISD::BITCAST, MVT::i32, Custom);
- setOperationAction(ISD::BITCAST, MVT::f32, Custom);
+ if (!Subtarget.hasVector()) {
+ setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+ setOperationAction(ISD::BITCAST, MVT::f32, Custom);
+ }
// VASTART and VACOPY need to deal with the SystemZ-specific varargs
// structure, but VAEND is a no-op.
@@ -298,6 +434,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
// Codes for which we want to perform some z-specific combinations.
setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::FP_ROUND);
+
+ // Handle intrinsics.
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// We want to use MVC in preference to even a single load/store pair.
MaxStoresPerMemcpy = 0;
@@ -342,6 +485,16 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return Imm.isZero() || Imm.isNegZero();
}
+bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ // We can use CGFI or CLGFI.
+ return isInt<32>(Imm) || isUInt<32>(Imm);
+}
+
+bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ // We can use ALGFI or SLGFI.
+ return isUInt<32>(Imm) || isUInt<32>(-Imm);
+}
+
bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned,
unsigned,
@@ -499,8 +652,10 @@ parseRegisterNumber(const std::string &Constraint,
return std::make_pair(0U, nullptr);
}
-std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const {
+std::pair<unsigned, const TargetRegisterClass *>
+SystemZTargetLowering::getRegForInlineAsmConstraint(
+ const TargetRegisterInfo *TRI, const std::string &Constraint,
+ MVT VT) const {
if (Constraint.size() == 1) {
// GCC Constraint Letters
switch (Constraint[0]) {
@@ -557,7 +712,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const {
SystemZMC::FP64Regs);
}
}
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
void SystemZTargetLowering::
@@ -570,35 +725,35 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
case 'I': // Unsigned 8-bit constant
if (auto *C = dyn_cast<ConstantSDNode>(Op))
if (isUInt<8>(C->getZExtValue()))
- Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+ Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType()));
return;
case 'J': // Unsigned 12-bit constant
if (auto *C = dyn_cast<ConstantSDNode>(Op))
if (isUInt<12>(C->getZExtValue()))
- Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+ Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType()));
return;
case 'K': // Signed 16-bit constant
if (auto *C = dyn_cast<ConstantSDNode>(Op))
if (isInt<16>(C->getSExtValue()))
- Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+ Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
Op.getValueType()));
return;
case 'L': // Signed 20-bit displacement (on all targets we support)
if (auto *C = dyn_cast<ConstantSDNode>(Op))
if (isInt<20>(C->getSExtValue()))
- Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+ Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
Op.getValueType()));
return;
case 'M': // 0x7fffffff
if (auto *C = dyn_cast<ConstantSDNode>(Op))
if (C->getZExtValue() == 0x7fffffff)
- Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+ Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType()));
return;
}
@@ -623,6 +778,24 @@ bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
return true;
}
+// We do not yet support 128-bit single-element vector types. If the user
+// attempts to use such types as function argument or return type, prefer
+// to error out instead of emitting code violating the ABI.
+static void VerifyVectorType(MVT VT, EVT ArgVT) {
+ if (ArgVT.isVector() && !VT.isVector())
+ report_fatal_error("Unsupported vector argument or return type");
+}
+
+static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
+ for (unsigned i = 0; i < Ins.size(); ++i)
+ VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
+}
+
+static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ for (unsigned i = 0; i < Outs.size(); ++i)
+ VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
+}
+
// Value is a value that has been passed to us in the location described by VA
// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
// any loads onto Chain.
@@ -643,7 +816,15 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,
else if (VA.getLocInfo() == CCValAssign::Indirect)
Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
MachinePointerInfo(), false, false, false, 0);
- else
+ else if (VA.getLocInfo() == CCValAssign::BCvt) {
+ // If this is a short vector argument loaded from the stack,
+ // extend from i64 to full vector size and then bitcast.
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT().isVector());
+ Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64,
+ Value, DAG.getUNDEF(MVT::i64));
+ Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
+ } else
assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
return Value;
}
@@ -660,6 +841,14 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
case CCValAssign::AExt:
return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
+ case CCValAssign::BCvt:
+ // If this is a short vector argument to be stored to the stack,
+ // bitcast to v2i64 and then extract first element.
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT().isVector());
+ Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
+ DAG.getConstant(0, DL, MVT::i32));
case CCValAssign::Full:
return Value;
default:
@@ -676,13 +865,17 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
SystemZMachineFunctionInfo *FuncInfo =
- MF.getInfo<SystemZMachineFunctionInfo>();
- auto *TFL = static_cast<const SystemZFrameLowering *>(
- DAG.getSubtarget().getFrameLowering());
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ auto *TFL =
+ static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+
+ // Detect unsupported vector argument types.
+ if (Subtarget.hasVector())
+ VerifyVectorTypes(Ins);
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+ SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
unsigned NumFixedGPRs = 0;
@@ -714,6 +907,14 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
NumFixedFPRs += 1;
RC = &SystemZ::FP64BitRegClass;
break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ RC = &SystemZ::VR128BitRegClass;
+ break;
}
unsigned VReg = MRI.createVirtualRegister(RC);
@@ -732,7 +933,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
EVT PtrVT = getPointerTy();
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
- FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4));
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
+ DAG.getIntPtrConstant(4, DL));
ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0);
@@ -818,9 +1020,15 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
EVT PtrVT = getPointerTy();
+ // Detect unsupported vector argument and return types.
+ if (Subtarget.hasVector()) {
+ VerifyVectorTypes(Outs);
+ VerifyVectorTypes(Ins);
+ }
+
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+ SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
// We don't support GuaranteedTailCallOpt, only automatically-detected
@@ -833,7 +1041,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Mark the start of the call.
if (!IsTailCall)
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true),
+ Chain = DAG.getCALLSEQ_START(Chain,
+ DAG.getConstant(NumBytes, DL, PtrVT, true),
DL);
// Copy argument values to their designated locations.
@@ -869,7 +1078,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
Offset += 4;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
- DAG.getIntPtrConstant(Offset));
+ DAG.getIntPtrConstant(Offset, DL));
// Emit the store.
MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address,
@@ -917,9 +1126,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
RegsToPass[I].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -936,8 +1144,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Mark the end of the call, which is glued to the call itself.
Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getConstant(NumBytes, PtrVT, true),
- DAG.getConstant(0, PtrVT, true),
+ DAG.getConstant(NumBytes, DL, PtrVT, true),
+ DAG.getConstant(0, DL, PtrVT, true),
Glue, DL);
Glue = Chain.getValue(1);
@@ -972,6 +1180,10 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
SDLoc DL, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
+ // Detect unsupported vector return types.
+ if (Subtarget.hasVector())
+ VerifyVectorTypes(Outs);
+
// Assign locations to each returned value.
SmallVector<CCValAssign, 16> RetLocs;
CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
@@ -1015,6 +1227,207 @@ prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const {
return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
}
+// Return true if Op is an intrinsic node with chain that returns the CC value
+// as its only (other) argument. Provide the associated SystemZISD opcode and
+// the mask of valid CC values if so.
+static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
+ unsigned &CCValid) {
+ unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (Id) {
+ case Intrinsic::s390_tbegin:
+ Opcode = SystemZISD::TBEGIN;
+ CCValid = SystemZ::CCMASK_TBEGIN;
+ return true;
+
+ case Intrinsic::s390_tbegin_nofloat:
+ Opcode = SystemZISD::TBEGIN_NOFLOAT;
+ CCValid = SystemZ::CCMASK_TBEGIN;
+ return true;
+
+ case Intrinsic::s390_tend:
+ Opcode = SystemZISD::TEND;
+ CCValid = SystemZ::CCMASK_TEND;
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+// Return true if Op is an intrinsic node without chain that returns the
+// CC value as its final argument. Provide the associated SystemZISD
+// opcode and the mask of valid CC values if so.
+static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
+ unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (Id) {
+ case Intrinsic::s390_vpkshs:
+ case Intrinsic::s390_vpksfs:
+ case Intrinsic::s390_vpksgs:
+ Opcode = SystemZISD::PACKS_CC;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vpklshs:
+ case Intrinsic::s390_vpklsfs:
+ case Intrinsic::s390_vpklsgs:
+ Opcode = SystemZISD::PACKLS_CC;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vceqbs:
+ case Intrinsic::s390_vceqhs:
+ case Intrinsic::s390_vceqfs:
+ case Intrinsic::s390_vceqgs:
+ Opcode = SystemZISD::VICMPES;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vchbs:
+ case Intrinsic::s390_vchhs:
+ case Intrinsic::s390_vchfs:
+ case Intrinsic::s390_vchgs:
+ Opcode = SystemZISD::VICMPHS;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vchlbs:
+ case Intrinsic::s390_vchlhs:
+ case Intrinsic::s390_vchlfs:
+ case Intrinsic::s390_vchlgs:
+ Opcode = SystemZISD::VICMPHLS;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vtm:
+ Opcode = SystemZISD::VTM;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vfaebs:
+ case Intrinsic::s390_vfaehs:
+ case Intrinsic::s390_vfaefs:
+ Opcode = SystemZISD::VFAE_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfaezbs:
+ case Intrinsic::s390_vfaezhs:
+ case Intrinsic::s390_vfaezfs:
+ Opcode = SystemZISD::VFAEZ_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfeebs:
+ case Intrinsic::s390_vfeehs:
+ case Intrinsic::s390_vfeefs:
+ Opcode = SystemZISD::VFEE_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfeezbs:
+ case Intrinsic::s390_vfeezhs:
+ case Intrinsic::s390_vfeezfs:
+ Opcode = SystemZISD::VFEEZ_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfenebs:
+ case Intrinsic::s390_vfenehs:
+ case Intrinsic::s390_vfenefs:
+ Opcode = SystemZISD::VFENE_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfenezbs:
+ case Intrinsic::s390_vfenezhs:
+ case Intrinsic::s390_vfenezfs:
+ Opcode = SystemZISD::VFENEZ_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vistrbs:
+ case Intrinsic::s390_vistrhs:
+ case Intrinsic::s390_vistrfs:
+ Opcode = SystemZISD::VISTR_CC;
+ CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
+ return true;
+
+ case Intrinsic::s390_vstrcbs:
+ case Intrinsic::s390_vstrchs:
+ case Intrinsic::s390_vstrcfs:
+ Opcode = SystemZISD::VSTRC_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vstrczbs:
+ case Intrinsic::s390_vstrczhs:
+ case Intrinsic::s390_vstrczfs:
+ Opcode = SystemZISD::VSTRCZ_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vfcedbs:
+ Opcode = SystemZISD::VFCMPES;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vfchdbs:
+ Opcode = SystemZISD::VFCMPHS;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vfchedbs:
+ Opcode = SystemZISD::VFCMPHES;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ case Intrinsic::s390_vftcidb:
+ Opcode = SystemZISD::VFTCI;
+ CCValid = SystemZ::CCMASK_VCMP;
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+// Emit an intrinsic with chain with a glued value instead of its CC result.
+static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
+ unsigned Opcode) {
+ // Copy all operands except the intrinsic ID.
+ unsigned NumOps = Op.getNumOperands();
+ SmallVector<SDValue, 6> Ops;
+ Ops.reserve(NumOps - 1);
+ Ops.push_back(Op.getOperand(0));
+ for (unsigned I = 2; I < NumOps; ++I)
+ Ops.push_back(Op.getOperand(I));
+
+ assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+ SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+ SDValue OldChain = SDValue(Op.getNode(), 1);
+ SDValue NewChain = SDValue(Intr.getNode(), 0);
+ DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
+ return Intr;
+}
+
+// Emit an intrinsic with a glued value instead of its CC result.
+static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op,
+ unsigned Opcode) {
+ // Copy all operands except the intrinsic ID.
+ unsigned NumOps = Op.getNumOperands();
+ SmallVector<SDValue, 6> Ops;
+ Ops.reserve(NumOps - 1);
+ for (unsigned I = 1; I < NumOps; ++I)
+ Ops.push_back(Op.getOperand(I));
+
+ if (Op->getNumValues() == 1)
+ return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops);
+ assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result");
+ SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue);
+ return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+}
+
// CC is a comparison that will be implemented using an integer or
// floating-point comparison. Return the condition code mask for
// a branch on true. In the integer case, CCMASK_CMP_UO is set for
@@ -1112,7 +1525,7 @@ static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
// If C can be converted to a comparison against zero, adjust the operands
// as necessary.
-static void adjustZeroCmp(SelectionDAG &DAG, Comparison &C) {
+static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
if (C.ICmpType == SystemZICMP::UnsignedOnly)
return;
@@ -1126,13 +1539,13 @@ static void adjustZeroCmp(SelectionDAG &DAG, Comparison &C) {
(Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
(Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
- C.Op1 = DAG.getConstant(0, C.Op1.getValueType());
+ C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
}
}
// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
// adjust the operands as necessary.
-static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) {
+static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
// For us to make any changes, it must a comparison between a single-use
// load and a constant.
if (!C.Op0.hasOneUse() ||
@@ -1197,7 +1610,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) {
// Make sure that the second operand is an i32 with the right value.
if (C.Op1.getValueType() != MVT::i32 ||
Value != ConstOp1->getZExtValue())
- C.Op1 = DAG.getConstant(Value, MVT::i32);
+ C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
}
// Return true if Op is either an unextended load, or a load suitable
@@ -1293,7 +1706,7 @@ static unsigned reverseCCMask(unsigned CCMask) {
// Check whether C tests for equality between X and Y and whether X - Y
// or Y - X is also computed. In that case it's better to compare the
// result of the subtraction against zero.
-static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) {
+static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
C.CCMask == SystemZ::CCMASK_CMP_NE) {
for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
@@ -1302,7 +1715,7 @@ static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) {
((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
(N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
C.Op0 = SDValue(N, 0);
- C.Op1 = DAG.getConstant(0, N->getValueType(0));
+ C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
return;
}
}
@@ -1358,7 +1771,7 @@ static void adjustForLTGFR(Comparison &C) {
// If C compares the truncation of an extending load, try to compare
// the untruncated value instead. This exposes more opportunities to
// reuse CC.
-static void adjustICmpTruncate(SelectionDAG &DAG, Comparison &C) {
+static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
if (C.Op0.getOpcode() == ISD::TRUNCATE &&
C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
C.Op1.getOpcode() == ISD::Constant &&
@@ -1370,7 +1783,7 @@ static void adjustICmpTruncate(SelectionDAG &DAG, Comparison &C) {
if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
(Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
C.Op0 = C.Op0.getOperand(0);
- C.Op1 = DAG.getConstant(0, C.Op0.getValueType());
+ C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
}
}
}
@@ -1489,7 +1902,7 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
// See whether C can be implemented as a TEST UNDER MASK instruction.
// Update the arguments with the TM version if so.
-static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
+static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
// Check that we have a comparison with a constant.
auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
if (!ConstOp1)
@@ -1529,6 +1942,8 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
MaskVal = -(CmpVal & -CmpVal);
NewC.ICmpType = SystemZICMP::UnsignedOnly;
}
+ if (!MaskVal)
+ return;
// Check whether the combination of mask, comparison value and comparison
// type are suitable.
@@ -1565,14 +1980,62 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
if (Mask && Mask->getZExtValue() == MaskVal)
C.Op1 = SDValue(Mask, 0);
else
- C.Op1 = DAG.getConstant(MaskVal, C.Op0.getValueType());
+ C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
C.CCValid = SystemZ::CCMASK_TM;
C.CCMask = NewCCMask;
}
+// Return a Comparison that tests the condition-code result of intrinsic
+// node Call against constant integer CC using comparison code Cond.
+// Opcode is the opcode of the SystemZISD operation for the intrinsic
+// and CCValid is the set of possible condition-code results.
+static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
+ SDValue Call, unsigned CCValid, uint64_t CC,
+ ISD::CondCode Cond) {
+ Comparison C(Call, SDValue());
+ C.Opcode = Opcode;
+ C.CCValid = CCValid;
+ if (Cond == ISD::SETEQ)
+ // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
+ C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
+ else if (Cond == ISD::SETNE)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
+ else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
+ // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
+ // always true for CC>3.
+ C.CCMask = CC < 4 ? -1 << (4 - CC) : -1;
+ else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0;
+ else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
+ // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
+ // always true for CC>3.
+ C.CCMask = CC < 4 ? -1 << (3 - CC) : -1;
+ else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0;
+ else
+ llvm_unreachable("Unexpected integer comparison type");
+ C.CCMask &= CCValid;
+ return C;
+}
+
// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
- ISD::CondCode Cond) {
+ ISD::CondCode Cond, SDLoc DL) {
+ if (CmpOp1.getOpcode() == ISD::Constant) {
+ uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
+ unsigned Opcode, CCValid;
+ if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
+ isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
+ return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+ if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
+ isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
+ return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+ }
Comparison C(CmpOp0, CmpOp1);
C.CCMask = CCMaskForCondCode(Cond);
if (C.Op0.getValueType().isFloatingPoint()) {
@@ -1596,11 +2059,11 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
else
C.ICmpType = SystemZICMP::SignedOnly;
C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
- adjustZeroCmp(DAG, C);
- adjustSubwordCmp(DAG, C);
- adjustForSubtraction(DAG, C);
+ adjustZeroCmp(DAG, DL, C);
+ adjustSubwordCmp(DAG, DL, C);
+ adjustForSubtraction(DAG, DL, C);
adjustForLTGFR(C);
- adjustICmpTruncate(DAG, C);
+ adjustICmpTruncate(DAG, DL, C);
}
if (shouldSwapCmpOperands(C)) {
@@ -1608,20 +2071,34 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
C.CCMask = reverseCCMask(C.CCMask);
}
- adjustForTestUnderMask(DAG, C);
+ adjustForTestUnderMask(DAG, DL, C);
return C;
}
// Emit the comparison instruction described by C.
static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+ if (!C.Op1.getNode()) {
+ SDValue Op;
+ switch (C.Op0.getOpcode()) {
+ case ISD::INTRINSIC_W_CHAIN:
+ Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
+ break;
+ case ISD::INTRINSIC_WO_CHAIN:
+ Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode);
+ break;
+ default:
+ llvm_unreachable("Invalid comparison operands");
+ }
+ return SDValue(Op.getNode(), Op->getNumValues() - 1);
+ }
if (C.Opcode == SystemZISD::ICMP)
return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
- DAG.getConstant(C.ICmpType, MVT::i32));
+ DAG.getConstant(C.ICmpType, DL, MVT::i32));
if (C.Opcode == SystemZISD::TM) {
bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1,
- DAG.getConstant(RegisterOnly, MVT::i32));
+ DAG.getConstant(RegisterOnly, DL, MVT::i32));
}
return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1);
}
@@ -1635,7 +2112,8 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
- Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64));
+ Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
+ DAG.getConstant(32, DL, MVT::i64));
Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
}
@@ -1667,46 +2145,175 @@ static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue,
if (Conversion.XORValue)
Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result,
- DAG.getConstant(Conversion.XORValue, MVT::i32));
+ DAG.getConstant(Conversion.XORValue, DL, MVT::i32));
if (Conversion.AddValue)
Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result,
- DAG.getConstant(Conversion.AddValue, MVT::i32));
+ DAG.getConstant(Conversion.AddValue, DL, MVT::i32));
// The SHR/AND sequence should get optimized to an RISBG.
Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result,
- DAG.getConstant(Conversion.Bit, MVT::i32));
+ DAG.getConstant(Conversion.Bit, DL, MVT::i32));
if (Conversion.Bit != 31)
Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
- DAG.getConstant(1, MVT::i32));
+ DAG.getConstant(1, DL, MVT::i32));
return Result;
}
+// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
+// be done directly. IsFP is true if CC is for a floating-point rather than
+// integer comparison.
+static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
+ switch (CC) {
+ case ISD::SETOEQ:
+ case ISD::SETEQ:
+ return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
+
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
+
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
+
+ case ISD::SETUGT:
+ return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
+
+ default:
+ return 0;
+ }
+}
+
+// Return the SystemZISD vector comparison operation for CC or its inverse,
+// or 0 if neither can be done directly. Indicate in Invert whether the
+// result is for the inverse of CC. IsFP is true if CC is for a
+// floating-point rather than integer comparison.
+static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
+ bool &Invert) {
+ if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
+ Invert = false;
+ return Opcode;
+ }
+
+ CC = ISD::getSetCCInverse(CC, !IsFP);
+ if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
+ Invert = true;
+ return Opcode;
+ }
+
+ return 0;
+}
+
+// Return a v2f64 that contains the extended form of elements Start and Start+1
+// of v4f32 value Op.
+static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL,
+ SDValue Op) {
+ int Mask[] = { Start, -1, Start + 1, -1 };
+ Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
+ return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
+}
+
+// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
+// producing a result of type VT.
+static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL,
+ EVT VT, SDValue CmpOp0, SDValue CmpOp1) {
+ // There is no hardware support for v4f32, so extend the vector into
+ // two v2f64s and compare those.
+ if (CmpOp0.getValueType() == MVT::v4f32) {
+ SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
+ SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
+ SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
+ SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
+ SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
+ SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
+ return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
+ }
+ return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
+}
+
+// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
+// an integer mask of type VT.
+static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT,
+ ISD::CondCode CC, SDValue CmpOp0,
+ SDValue CmpOp1) {
+ bool IsFP = CmpOp0.getValueType().isFloatingPoint();
+ bool Invert = false;
+ SDValue Cmp;
+ switch (CC) {
+ // Handle tests for order using (or (ogt y x) (oge x y)).
+ case ISD::SETUO:
+ Invert = true;
+ case ISD::SETO: {
+ assert(IsFP && "Unexpected integer comparison");
+ SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
+ SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
+ Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
+ break;
+ }
+
+ // Handle <> tests using (or (ogt y x) (ogt x y)).
+ case ISD::SETUEQ:
+ Invert = true;
+ case ISD::SETONE: {
+ assert(IsFP && "Unexpected integer comparison");
+ SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
+ SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
+ Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
+ break;
+ }
+
+ // Otherwise a single comparison is enough. It doesn't really
+ // matter whether we try the inversion or the swap first, since
+ // there are no cases where both work.
+ default:
+ if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
+ Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
+ else {
+ CC = ISD::getSetCCSwappedOperands(CC);
+ if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
+ Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
+ else
+ llvm_unreachable("Unhandled comparison");
+ }
+ break;
+ }
+ if (Invert) {
+ SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
+ DAG.getConstant(65535, DL, MVT::i32));
+ Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
+ Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
+ }
+ return Cmp;
+}
+
SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
SelectionDAG &DAG) const {
SDValue CmpOp0 = Op.getOperand(0);
SDValue CmpOp1 = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
- Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
+ Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
SDValue Glue = emitCmp(DAG, DL, C);
return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
}
SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue CmpOp0 = Op.getOperand(2);
SDValue CmpOp1 = Op.getOperand(3);
SDValue Dest = Op.getOperand(4);
SDLoc DL(Op);
- Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
+ Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
SDValue Glue = emitCmp(DAG, DL, C);
return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
- Chain, DAG.getConstant(C.CCValid, MVT::i32),
- DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue);
+ Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
+ DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue);
}
// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
@@ -1727,7 +2334,7 @@ static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op,
Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
if (IsNegative)
Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
- DAG.getConstant(0, Op.getValueType()), Op);
+ DAG.getConstant(0, DL, Op.getValueType()), Op);
return Op;
}
@@ -1740,7 +2347,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDLoc DL(Op);
- Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
+ Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
// Check for absolute and negative-absolute selections, including those
// where the comparison value is sign-extended (for LPGFR and LNGFR).
@@ -1775,18 +2382,14 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
if (!is32Bit(VT))
Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result);
// Sign-extend from the low bit.
- SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, MVT::i32);
+ SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32);
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt);
return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt);
}
}
- SmallVector<SDValue, 5> Ops;
- Ops.push_back(TrueOp);
- Ops.push_back(FalseOp);
- Ops.push_back(DAG.getConstant(C.CCValid, MVT::i32));
- Ops.push_back(DAG.getConstant(C.CCMask, MVT::i32));
- Ops.push_back(Glue);
+ SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
+ DAG.getConstant(C.CCMask, DL, MVT::i32), Glue};
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
@@ -1826,11 +2429,58 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
// addition for it.
if (Offset != 0)
Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
- DAG.getConstant(Offset, PtrVT));
+ DAG.getConstant(Offset, DL, PtrVT));
return Result;
}
+SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG,
+ unsigned Opcode,
+ SDValue GOTOffset) const {
+ SDLoc DL(Node);
+ EVT PtrVT = getPointerTy();
+ SDValue Chain = DAG.getEntryNode();
+ SDValue Glue;
+
+ // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
+ Glue = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
+ Glue = Chain.getValue(1);
+
+ // The first call operand is the chain and the second is the TLS symbol.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
+ Node->getValueType(0),
+ 0, 0));
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
+ Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const uint32_t *Mask =
+ TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ // Glue the call to the argument copies.
+ Ops.push_back(Glue);
+
+ // Emit the call.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
+ Glue = Chain.getValue(1);
+
+ // Copy the return value from %r2.
+ return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
+}
+
SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SelectionDAG &DAG) const {
SDLoc DL(Node);
@@ -1838,33 +2488,94 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
EVT PtrVT = getPointerTy();
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
- if (model != TLSModel::LocalExec)
- llvm_unreachable("only local-exec TLS mode supported");
-
// The high part of the thread pointer is in access register 0.
SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(0, DL, MVT::i32));
TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
// The low part of the thread pointer is in access register 1.
SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
- DAG.getConstant(1, MVT::i32));
+ DAG.getConstant(1, DL, MVT::i32));
TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
// Merge them into a single 64-bit address.
SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
- DAG.getConstant(32, PtrVT));
+ DAG.getConstant(32, DL, PtrVT));
SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
- // Get the offset of GA from the thread pointer.
- SystemZConstantPoolValue *CPV =
- SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
+ // Get the offset of GA from the thread pointer, based on the TLS model.
+ SDValue Offset;
+ switch (model) {
+ case TLSModel::GeneralDynamic: {
+ // Load the GOT offset of the tls_index (module ID / per-symbol offset).
+ SystemZConstantPoolValue *CPV =
+ SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
+
+ Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+ Offset, MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
- // Force the offset into the constant pool and load it from there.
- SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8);
- SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- CPAddr, MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ // Call __tls_get_offset to retrieve the offset.
+ Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
+ break;
+ }
+
+ case TLSModel::LocalDynamic: {
+ // Load the GOT offset of the module ID.
+ SystemZConstantPoolValue *CPV =
+ SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
+
+ Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+ Offset, MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+
+ // Call __tls_get_offset to retrieve the module base offset.
+ Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
+
+ // Note: The SystemZLDCleanupPass will remove redundant computations
+ // of the module base offset. Count total number of local-dynamic
+ // accesses to trigger execution of that pass.
+ SystemZMachineFunctionInfo* MFI =
+ DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
+ MFI->incNumLocalDynamicTLSAccesses();
+
+ // Add the per-symbol offset.
+ CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
+
+ SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
+ DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+ DTPOffset, MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+
+ Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
+ break;
+ }
+
+ case TLSModel::InitialExec: {
+ // Load the offset from the GOT.
+ Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ SystemZII::MO_INDNTPOFF);
+ Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+ Offset, MachinePointerInfo::getGOT(),
+ false, false, false, 0);
+ break;
+ }
+
+ case TLSModel::LocalExec: {
+ // Force the offset into the constant pool and load it from there.
+ SystemZConstantPoolValue *CPV =
+ SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
+
+ Offset = DAG.getConstantPool(CPV, PtrVT, 8);
+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+ Offset, MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+ break;
+ }
+ }
// Add the base and offset together.
return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
@@ -1916,6 +2627,13 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
EVT InVT = In.getValueType();
EVT ResVT = Op.getValueType();
+ // Convert loads directly. This is normally done by DAGCombiner,
+ // but we need this case for bitcasts that are created during lowering
+ // and which are then lowered themselves.
+ if (auto *LoadN = dyn_cast<LoadSDNode>(In))
+ return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
+ LoadN->getMemOperand());
+
if (InVT == MVT::i32 && ResVT == MVT::f32) {
SDValue In64;
if (Subtarget.hasHighWord()) {
@@ -1926,22 +2644,22 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
} else {
In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
- DAG.getConstant(32, MVT::i64));
+ DAG.getConstant(32, DL, MVT::i64));
}
SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
- return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
+ return DAG.getTargetExtractSubreg(SystemZ::subreg_r32,
DL, MVT::f32, Out64);
}
if (InVT == MVT::f32 && ResVT == MVT::i32) {
SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
- SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
+ SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL,
MVT::f64, SDValue(U64, 0), In);
SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
if (Subtarget.hasHighWord())
return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
MVT::i32, Out64);
SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
- DAG.getConstant(32, MVT::i64));
+ DAG.getConstant(32, DL, MVT::i64));
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
}
llvm_unreachable("Unexpected bitcast combination");
@@ -1962,8 +2680,8 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
// The initial values of each field.
const unsigned NumFields = 4;
SDValue Fields[NumFields] = {
- DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), PtrVT),
- DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), PtrVT),
+ DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
+ DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
};
@@ -1975,7 +2693,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
SDValue FieldAddr = Addr;
if (Offset != 0)
FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
- DAG.getIntPtrConstant(Offset));
+ DAG.getIntPtrConstant(Offset, DL));
MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
MachinePointerInfo(SV, Offset),
false, false, 0);
@@ -1993,8 +2711,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
SDLoc DL(Op);
- return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32),
+ return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
/*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
+ /*isTailCall*/false,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
@@ -2049,7 +2768,7 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
// multiplication:
//
// (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
- SDValue C63 = DAG.getConstant(63, MVT::i64);
+ SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
SDValue LL = Op.getOperand(0);
SDValue RL = Op.getOperand(1);
SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
@@ -2187,6 +2906,81 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
MVT::i64, HighOp, Low32);
}
+SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ Op = Op.getOperand(0);
+
+ // Handle vector types via VPOPCT.
+ if (VT.isVector()) {
+ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
+ Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
+ switch (VT.getVectorElementType().getSizeInBits()) {
+ case 8:
+ break;
+ case 16: {
+ Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
+ SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
+ Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
+ Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
+ break;
+ }
+ case 32: {
+ SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
+ DAG.getConstant(0, DL, MVT::i32));
+ Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
+ break;
+ }
+ case 64: {
+ SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
+ DAG.getConstant(0, DL, MVT::i32));
+ Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
+ Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
+ break;
+ }
+ default:
+ llvm_unreachable("Unexpected type");
+ }
+ return Op;
+ }
+
+ // Get the known-zero mask for the operand.
+ APInt KnownZero, KnownOne;
+ DAG.computeKnownBits(Op, KnownZero, KnownOne);
+ unsigned NumSignificantBits = (~KnownZero).getActiveBits();
+ if (NumSignificantBits == 0)
+ return DAG.getConstant(0, DL, VT);
+
+ // Skip known-zero high parts of the operand.
+ int64_t OrigBitSize = VT.getSizeInBits();
+ int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
+ BitSize = std::min(BitSize, OrigBitSize);
+
+ // The POPCNT instruction counts the number of bits in each byte.
+ Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
+ Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
+ Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+
+ // Add up per-byte counts in a binary tree. All bits of Op at
+ // position larger than BitSize remain zero throughout.
+ for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
+ SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
+ if (BitSize != OrigBitSize)
+ Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
+ DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
+ Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
+ }
+
+ // Extract overall result from high byte.
+ if (BitSize > 8)
+ Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+ DAG.getConstant(BitSize - 8, DL, VT));
+
+ return Op;
+}
+
// Op is an atomic load. Lower it into a normal volatile load.
SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
SelectionDAG &DAG) const {
@@ -2233,23 +3027,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
Opcode = SystemZISD::ATOMIC_LOADW_ADD;
- Src2 = DAG.getConstant(-Const->getSExtValue(), Src2.getValueType());
+ Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
}
// Get the address of the containing word.
SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
- DAG.getConstant(-4, PtrVT));
+ DAG.getConstant(-4, DL, PtrVT));
// Get the number of bits that the word must be rotated left in order
// to bring the field to the top bits of a GR32.
SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
- DAG.getConstant(3, PtrVT));
+ DAG.getConstant(3, DL, PtrVT));
BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
// Get the complementing shift amount, for rotating a field in the top
// bits back to its proper position.
SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
- DAG.getConstant(0, WideVT), BitShift);
+ DAG.getConstant(0, DL, WideVT), BitShift);
// Extend the source operand to 32 bits and prepare it for the inner loop.
// ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
@@ -2258,23 +3052,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
// bits must be set, while for other opcodes they should be left clear.
if (Opcode != SystemZISD::ATOMIC_SWAPW)
Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
- DAG.getConstant(32 - BitSize, WideVT));
+ DAG.getConstant(32 - BitSize, DL, WideVT));
if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
Opcode == SystemZISD::ATOMIC_LOADW_NAND)
Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
- DAG.getConstant(uint32_t(-1) >> BitSize, WideVT));
+ DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
// Construct the ATOMIC_LOADW_* node.
SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
- DAG.getConstant(BitSize, WideVT) };
+ DAG.getConstant(BitSize, DL, WideVT) };
SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
NarrowVT, MMO);
// Rotate the result of the final CS so that the field is in the lower
// bits of a GR32, then truncate it.
SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
- DAG.getConstant(BitSize, WideVT));
+ DAG.getConstant(BitSize, DL, WideVT));
SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
@@ -2300,10 +3094,10 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
// available or the negative value is in the range of A(G)FHI.
int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
- NegSrc2 = DAG.getConstant(Value, MemVT);
+ NegSrc2 = DAG.getConstant(Value, DL, MemVT);
} else if (Subtarget.hasInterlockedAccess1())
// Use LAA(G) if available.
- NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, MemVT),
+ NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
Src2);
if (NegSrc2.getNode())
@@ -2342,23 +3136,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
// Get the address of the containing word.
SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
- DAG.getConstant(-4, PtrVT));
+ DAG.getConstant(-4, DL, PtrVT));
// Get the number of bits that the word must be rotated left in order
// to bring the field to the top bits of a GR32.
SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
- DAG.getConstant(3, PtrVT));
+ DAG.getConstant(3, DL, PtrVT));
BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
// Get the complementing shift amount, for rotating a field in the top
// bits back to its proper position.
SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
- DAG.getConstant(0, WideVT), BitShift);
+ DAG.getConstant(0, DL, WideVT), BitShift);
// Construct the ATOMIC_CMP_SWAPW node.
SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
- NegBitShift, DAG.getConstant(BitSize, WideVT) };
+ NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
VTList, Ops, NarrowVT, MMO);
return AtomicOp;
@@ -2387,19 +3181,1084 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
// Just preserve the chain.
return Op.getOperand(0);
+ SDLoc DL(Op);
bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
SDValue Ops[] = {
Op.getOperand(0),
- DAG.getConstant(Code, MVT::i32),
+ DAG.getConstant(Code, DL, MVT::i32),
Op.getOperand(1)
};
- return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op),
+ return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
Node->getVTList(), Ops,
Node->getMemoryVT(), Node->getMemOperand());
}
+// Return an i32 that contains the value of CC immediately after After,
+// whose final operand must be MVT::Glue.
+static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
+ SDLoc DL(After);
+ SDValue Glue = SDValue(After, After->getNumValues() - 1);
+ SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+ return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+ DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
+}
+
+SDValue
+SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opcode, CCValid;
+ if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
+ assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+ SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
+ SDValue CC = getCCResult(DAG, Glued.getNode());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
+ return SDValue();
+ }
+
+ return SDValue();
+}
+
+SDValue
+SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opcode, CCValid;
+ if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
+ SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode);
+ SDValue CC = getCCResult(DAG, Glued.getNode());
+ if (Op->getNumValues() == 1)
+ return CC;
+ assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
+ Glued, CC);
+ }
+
+ unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (Id) {
+ case Intrinsic::s390_vpdi:
+ return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::s390_vperm:
+ return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::s390_vuphb:
+ case Intrinsic::s390_vuphh:
+ case Intrinsic::s390_vuphf:
+ return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
+
+ case Intrinsic::s390_vuplhb:
+ case Intrinsic::s390_vuplhh:
+ case Intrinsic::s390_vuplhf:
+ return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
+
+ case Intrinsic::s390_vuplb:
+ case Intrinsic::s390_vuplhw:
+ case Intrinsic::s390_vuplf:
+ return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
+
+ case Intrinsic::s390_vupllb:
+ case Intrinsic::s390_vupllh:
+ case Intrinsic::s390_vupllf:
+ return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1));
+
+ case Intrinsic::s390_vsumb:
+ case Intrinsic::s390_vsumh:
+ case Intrinsic::s390_vsumgh:
+ case Intrinsic::s390_vsumgf:
+ case Intrinsic::s390_vsumqf:
+ case Intrinsic::s390_vsumqg:
+ return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
+ return SDValue();
+}
+
+namespace {
+// Says that SystemZISD operation Opcode can be used to perform the equivalent
+// of a VPERM with permute vector Bytes. If Opcode takes three operands,
+// Operand is the constant third operand, otherwise it is the number of
+// bytes in each element of the result.
+struct Permute {
+ unsigned Opcode;
+ unsigned Operand;
+ unsigned char Bytes[SystemZ::VectorBytes];
+};
+}
+
+static const Permute PermuteForms[] = {
+ // VMRHG
+ { SystemZISD::MERGE_HIGH, 8,
+ { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
+ // VMRHF
+ { SystemZISD::MERGE_HIGH, 4,
+ { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
+ // VMRHH
+ { SystemZISD::MERGE_HIGH, 2,
+ { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
+ // VMRHB
+ { SystemZISD::MERGE_HIGH, 1,
+ { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
+ // VMRLG
+ { SystemZISD::MERGE_LOW, 8,
+ { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
+ // VMRLF
+ { SystemZISD::MERGE_LOW, 4,
+ { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
+ // VMRLH
+ { SystemZISD::MERGE_LOW, 2,
+ { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
+ // VMRLB
+ { SystemZISD::MERGE_LOW, 1,
+ { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
+ // VPKG
+ { SystemZISD::PACK, 4,
+ { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
+ // VPKF
+ { SystemZISD::PACK, 2,
+ { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
+ // VPKH
+ { SystemZISD::PACK, 1,
+ { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
+ // VPDI V1, V2, 4 (low half of V1, high half of V2)
+ { SystemZISD::PERMUTE_DWORDS, 4,
+ { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
+ // VPDI V1, V2, 1 (high half of V1, low half of V2)
+ { SystemZISD::PERMUTE_DWORDS, 1,
+ { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
+};
+
+// Called after matching a vector shuffle against a particular pattern.
+// Both the original shuffle and the pattern have two vector operands.
+// OpNos[0] is the operand of the original shuffle that should be used for
+// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
+// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
+// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
+// for operands 0 and 1 of the pattern.
+static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
+ if (OpNos[0] < 0) {
+ if (OpNos[1] < 0)
+ return false;
+ OpNo0 = OpNo1 = OpNos[1];
+ } else if (OpNos[1] < 0) {
+ OpNo0 = OpNo1 = OpNos[0];
+ } else {
+ OpNo0 = OpNos[0];
+ OpNo1 = OpNos[1];
+ }
+ return true;
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes. Return true if the VPERM can be implemented using P.
+// When returning true set OpNo0 to the VPERM operand that should be
+// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
+//
+// For example, if swapping the VPERM operands allows P to match, OpNo0
+// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
+// operand, but rewriting it to use two duplicated operands allows it to
+// match P, then OpNo0 and OpNo1 will be the same.
+static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
+ unsigned &OpNo0, unsigned &OpNo1) {
+ int OpNos[] = { -1, -1 };
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
+ int Elt = Bytes[I];
+ if (Elt >= 0) {
+ // Make sure that the two permute vectors use the same suboperand
+ // byte number. Only the operand numbers (the high bits) are
+ // allowed to differ.
+ if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
+ return false;
+ int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
+ int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
+ // Make sure that the operand mappings are consistent with previous
+ // elements.
+ if (OpNos[ModelOpNo] == 1 - RealOpNo)
+ return false;
+ OpNos[ModelOpNo] = RealOpNo;
+ }
+ }
+ return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
+}
+
+// As above, but search for a matching permute.
+static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
+ unsigned &OpNo0, unsigned &OpNo1) {
+ for (auto &P : PermuteForms)
+ if (matchPermute(Bytes, P, OpNo0, OpNo1))
+ return &P;
+ return nullptr;
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes. This permute is an operand of an outer permute.
+// See whether redistributing the -1 bytes gives a shuffle that can be
+// implemented using P. If so, set Transform to a VPERM-like permute vector
+// that, when applied to the result of P, gives the original permute in Bytes.
+static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
+ const Permute &P,
+ SmallVectorImpl<int> &Transform) {
+ unsigned To = 0;
+ for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
+ int Elt = Bytes[From];
+ if (Elt < 0)
+ // Byte number From of the result is undefined.
+ Transform[From] = -1;
+ else {
+ while (P.Bytes[To] != Elt) {
+ To += 1;
+ if (To == SystemZ::VectorBytes)
+ return false;
+ }
+ Transform[From] = To;
+ }
+ }
+ return true;
+}
+
+// As above, but search for a matching permute.
+static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
+ SmallVectorImpl<int> &Transform) {
+ for (auto &P : PermuteForms)
+ if (matchDoublePermute(Bytes, P, Transform))
+ return &P;
+ return nullptr;
+}
+
+// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
+// as if it had type vNi8.
+static void getVPermMask(ShuffleVectorSDNode *VSN,
+ SmallVectorImpl<int> &Bytes) {
+ EVT VT = VSN->getValueType(0);
+ unsigned NumElements = VT.getVectorNumElements();
+ unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
+ Bytes.resize(NumElements * BytesPerElement, -1);
+ for (unsigned I = 0; I < NumElements; ++I) {
+ int Index = VSN->getMaskElt(I);
+ if (Index >= 0)
+ for (unsigned J = 0; J < BytesPerElement; ++J)
+ Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
+ }
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
+// the result come from a contiguous sequence of bytes from one input.
+// Set Base to the selector for the first byte if so.
+static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
+ unsigned BytesPerElement, int &Base) {
+ Base = -1;
+ for (unsigned I = 0; I < BytesPerElement; ++I) {
+ if (Bytes[Start + I] >= 0) {
+ unsigned Elem = Bytes[Start + I];
+ if (Base < 0) {
+ Base = Elem - I;
+ // Make sure the bytes would come from one input operand.
+ if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
+ return false;
+ } else if (unsigned(Base) != Elem - I)
+ return false;
+ }
+ }
+ return true;
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes. Return true if it can be performed using VSLDI.
+// When returning true, set StartIndex to the shift amount and OpNo0
+// and OpNo1 to the VPERM operands that should be used as the first
+// and second shift operand respectively.
+static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
+ unsigned &StartIndex, unsigned &OpNo0,
+ unsigned &OpNo1) {
+ int OpNos[] = { -1, -1 };
+ int Shift = -1;
+ for (unsigned I = 0; I < 16; ++I) {
+ int Index = Bytes[I];
+ if (Index >= 0) {
+ int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
+ int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
+ int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
+ if (Shift < 0)
+ Shift = ExpectedShift;
+ else if (Shift != ExpectedShift)
+ return false;
+ // Make sure that the operand mappings are consistent with previous
+ // elements.
+ if (OpNos[ModelOpNo] == 1 - RealOpNo)
+ return false;
+ OpNos[ModelOpNo] = RealOpNo;
+ }
+ }
+ StartIndex = Shift;
+ return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
+}
+
+// Create a node that performs P on operands Op0 and Op1, casting the
+// operands to the appropriate type. The type of the result is determined by P.
+static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL,
+ const Permute &P, SDValue Op0, SDValue Op1) {
+ // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
+ // elements of a PACK are twice as wide as the outputs.
+ unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
+ P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
+ P.Operand);
+ // Cast both operands to the appropriate type.
+ MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
+ SystemZ::VectorBytes / InBytes);
+ Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
+ SDValue Op;
+ if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
+ SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
+ Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
+ } else if (P.Opcode == SystemZISD::PACK) {
+ MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
+ SystemZ::VectorBytes / P.Operand);
+ Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
+ } else {
+ Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
+ }
+ return Op;
+}
+
+// Bytes is a VPERM-like permute vector, except that -1 is used for
+// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
+// VSLDI or VPERM.
+static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops,
+ const SmallVectorImpl<int> &Bytes) {
+ for (unsigned I = 0; I < 2; ++I)
+ Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
+
+ // First see whether VSLDI can be used.
+ unsigned StartIndex, OpNo0, OpNo1;
+ if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
+ return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
+ Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
+
+ // Fall back on VPERM. Construct an SDNode for the permute vector.
+ SDValue IndexNodes[SystemZ::VectorBytes];
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
+ if (Bytes[I] >= 0)
+ IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
+ else
+ IndexNodes[I] = DAG.getUNDEF(MVT::i32);
+ SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes);
+ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
+}
+
+namespace {
+// Describes a general N-operand vector shuffle.
+struct GeneralShuffle {
+ GeneralShuffle(EVT vt) : VT(vt) {}
+ void addUndef();
+ void add(SDValue, unsigned);
+ SDValue getNode(SelectionDAG &, SDLoc);
+
+ // The operands of the shuffle.
+ SmallVector<SDValue, SystemZ::VectorBytes> Ops;
+
+ // Index I is -1 if byte I of the result is undefined. Otherwise the
+ // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
+ // Bytes[I] / SystemZ::VectorBytes.
+ SmallVector<int, SystemZ::VectorBytes> Bytes;
+
+ // The type of the shuffle result.
+ EVT VT;
+};
+}
+
+// Add an extra undefined element to the shuffle.
+void GeneralShuffle::addUndef() {
+ unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
+ for (unsigned I = 0; I < BytesPerElement; ++I)
+ Bytes.push_back(-1);
+}
+
+// Add an extra element to the shuffle, taking it from element Elem of Op.
+// A null Op indicates a vector input whose value will be calculated later;
+// there is at most one such input per shuffle and it always has the same
+// type as the result.
+void GeneralShuffle::add(SDValue Op, unsigned Elem) {
+ unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
+
+ // The source vector can have wider elements than the result,
+ // either through an explicit TRUNCATE or because of type legalization.
+ // We want the least significant part.
+ EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
+ unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
+ assert(FromBytesPerElement >= BytesPerElement &&
+ "Invalid EXTRACT_VECTOR_ELT");
+ unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
+ (FromBytesPerElement - BytesPerElement));
+
+ // Look through things like shuffles and bitcasts.
+ while (Op.getNode()) {
+ if (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
+ // See whether the bytes we need come from a contiguous part of one
+ // operand.
+ SmallVector<int, SystemZ::VectorBytes> OpBytes;
+ getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
+ int NewByte;
+ if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
+ break;
+ if (NewByte < 0) {
+ addUndef();
+ return;
+ }
+ Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
+ Byte = unsigned(NewByte) % SystemZ::VectorBytes;
+ } else if (Op.getOpcode() == ISD::UNDEF) {
+ addUndef();
+ return;
+ } else
+ break;
+ }
+
+ // Make sure that the source of the extraction is in Ops.
+ unsigned OpNo = 0;
+ for (; OpNo < Ops.size(); ++OpNo)
+ if (Ops[OpNo] == Op)
+ break;
+ if (OpNo == Ops.size())
+ Ops.push_back(Op);
+
+ // Add the element to Bytes.
+ unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
+ for (unsigned I = 0; I < BytesPerElement; ++I)
+ Bytes.push_back(Base + I);
+}
+
+// Return SDNodes for the completed shuffle.
+SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) {
+ assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
+
+ if (Ops.size() == 0)
+ return DAG.getUNDEF(VT);
+
+ // Make sure that there are at least two shuffle operands.
+ if (Ops.size() == 1)
+ Ops.push_back(DAG.getUNDEF(MVT::v16i8));
+
+ // Create a tree of shuffles, deferring root node until after the loop.
+ // Try to redistribute the undefined elements of non-root nodes so that
+ // the non-root shuffles match something like a pack or merge, then adjust
+ // the parent node's permute vector to compensate for the new order.
+ // Among other things, this copes with vectors like <2 x i16> that were
+ // padded with undefined elements during type legalization.
+ //
+ // In the best case this redistribution will lead to the whole tree
+ // using packs and merges. It should rarely be a loss in other cases.
+ unsigned Stride = 1;
+ for (; Stride * 2 < Ops.size(); Stride *= 2) {
+ for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
+ SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
+
+ // Create a mask for just these two operands.
+ SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
+ for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
+ unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
+ unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
+ if (OpNo == I)
+ NewBytes[J] = Byte;
+ else if (OpNo == I + Stride)
+ NewBytes[J] = SystemZ::VectorBytes + Byte;
+ else
+ NewBytes[J] = -1;
+ }
+ // See if it would be better to reorganize NewMask to avoid using VPERM.
+ SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
+ if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
+ Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
+ // Applying NewBytesMap to Ops[I] gets back to NewBytes.
+ for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
+ if (NewBytes[J] >= 0) {
+ assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
+ "Invalid double permute");
+ Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
+ } else
+ assert(NewBytesMap[J] < 0 && "Invalid double permute");
+ }
+ } else {
+ // Just use NewBytes on the operands.
+ Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
+ for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
+ if (NewBytes[J] >= 0)
+ Bytes[J] = I * SystemZ::VectorBytes + J;
+ }
+ }
+ }
+
+ // Now we just have 2 inputs. Put the second operand in Ops[1].
+ if (Stride > 1) {
+ Ops[1] = Ops[Stride];
+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
+ if (Bytes[I] >= int(SystemZ::VectorBytes))
+ Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
+ }
+
+ // Look for an instruction that can do the permute without resorting
+ // to VPERM.
+ unsigned OpNo0, OpNo1;
+ SDValue Op;
+ if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
+ Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
+ else
+ Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+}
+
+// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
+static bool isScalarToVector(SDValue Op) {
+ for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
+ if (Op.getOperand(I).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+// Return a vector of type VT that contains Value in the first element.
+// The other elements don't matter.
+static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
+ SDValue Value) {
+ // If we have a constant, replicate it to all elements and let the
+ // BUILD_VECTOR lowering take care of it.
+ if (Value.getOpcode() == ISD::Constant ||
+ Value.getOpcode() == ISD::ConstantFP) {
+ SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+ }
+ if (Value.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(VT);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
+}
+
+// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
+// element 1. Used for cases in which replication is cheap.
+static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT,
+ SDValue Op0, SDValue Op1) {
+ if (Op0.getOpcode() == ISD::UNDEF) {
+ if (Op1.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(VT);
+ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
+ }
+ if (Op1.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
+ return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
+ buildScalarToVector(DAG, DL, VT, Op0),
+ buildScalarToVector(DAG, DL, VT, Op1));
+}
+
+// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
+// vector for them.
+static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0,
+ SDValue Op1) {
+ if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(MVT::v2i64);
+ // If one of the two inputs is undefined then replicate the other one,
+ // in order to avoid using another register unnecessarily.
+ if (Op0.getOpcode() == ISD::UNDEF)
+ Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
+ else if (Op1.getOpcode() == ISD::UNDEF)
+ Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
+ else {
+ Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
+ Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
+ }
+ return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
+}
+
+// Try to represent constant BUILD_VECTOR node BVN using a
+// SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask
+// on success.
+static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
+ EVT ElemVT = BVN->getValueType(0).getVectorElementType();
+ unsigned BytesPerElement = ElemVT.getStoreSize();
+ for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
+ SDValue Op = BVN->getOperand(I);
+ if (Op.getOpcode() != ISD::UNDEF) {
+ uint64_t Value;
+ if (Op.getOpcode() == ISD::Constant)
+ Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue();
+ else if (Op.getOpcode() == ISD::ConstantFP)
+ Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
+ .getZExtValue());
+ else
+ return false;
+ for (unsigned J = 0; J < BytesPerElement; ++J) {
+ uint64_t Byte = (Value >> (J * 8)) & 0xff;
+ if (Byte == 0xff)
+ Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
+ else if (Byte != 0)
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+// Try to load a vector constant in which BitsPerElement-bit value Value
+// is replicated to fill the vector. VT is the type of the resulting
+// constant, which may have elements of a different size from BitsPerElement.
+// Return the SDValue of the constant on success, otherwise return
+// an empty value.
+static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
+ const SystemZInstrInfo *TII,
+ SDLoc DL, EVT VT, uint64_t Value,
+ unsigned BitsPerElement) {
+ // Signed 16-bit values can be replicated using VREPI.
+ int64_t SignedValue = SignExtend64(Value, BitsPerElement);
+ if (isInt<16>(SignedValue)) {
+ MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
+ SystemZ::VectorBits / BitsPerElement);
+ SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT,
+ DAG.getConstant(SignedValue, DL, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
+ // See whether rotating the constant left some N places gives a value that
+ // is one less than a power of 2 (i.e. all zeros followed by all ones).
+ // If so we can use VGM.
+ unsigned Start, End;
+ if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
+ // isRxSBGMask returns the bit numbers for a full 64-bit value,
+ // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to
+ // bit numbers for an BitsPerElement value, so that 0 denotes
+ // 1 << (BitsPerElement-1).
+ Start -= 64 - BitsPerElement;
+ End -= 64 - BitsPerElement;
+ MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
+ SystemZ::VectorBits / BitsPerElement);
+ SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT,
+ DAG.getConstant(Start, DL, MVT::i32),
+ DAG.getConstant(End, DL, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
+ return SDValue();
+}
+
+// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
+// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
+// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
+// would benefit from this representation and return it if so.
+static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
+ BuildVectorSDNode *BVN) {
+ EVT VT = BVN->getValueType(0);
+ unsigned NumElements = VT.getVectorNumElements();
+
+ // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
+ // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
+ // need a BUILD_VECTOR, add an additional placeholder operand for that
+ // BUILD_VECTOR and store its operands in ResidueOps.
+ GeneralShuffle GS(VT);
+ SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
+ bool FoundOne = false;
+ for (unsigned I = 0; I < NumElements; ++I) {
+ SDValue Op = BVN->getOperand(I);
+ if (Op.getOpcode() == ISD::TRUNCATE)
+ Op = Op.getOperand(0);
+ if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op.getOperand(1).getOpcode() == ISD::Constant) {
+ unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ GS.add(Op.getOperand(0), Elem);
+ FoundOne = true;
+ } else if (Op.getOpcode() == ISD::UNDEF) {
+ GS.addUndef();
+ } else {
+ GS.add(SDValue(), ResidueOps.size());
+ ResidueOps.push_back(Op);
+ }
+ }
+
+ // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
+ if (!FoundOne)
+ return SDValue();
+
+ // Create the BUILD_VECTOR for the remaining elements, if any.
+ if (!ResidueOps.empty()) {
+ while (ResidueOps.size() < NumElements)
+ ResidueOps.push_back(DAG.getUNDEF(VT.getVectorElementType()));
+ for (auto &Op : GS.Ops) {
+ if (!Op.getNode()) {
+ Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps);
+ break;
+ }
+ }
+ }
+ return GS.getNode(DAG, SDLoc(BVN));
+}
+
+// Combine GPR scalar values Elems into a vector of type VT.
+static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
+ SmallVectorImpl<SDValue> &Elems) {
+ // See whether there is a single replicated value.
+ SDValue Single;
+ unsigned int NumElements = Elems.size();
+ unsigned int Count = 0;
+ for (auto Elem : Elems) {
+ if (Elem.getOpcode() != ISD::UNDEF) {
+ if (!Single.getNode())
+ Single = Elem;
+ else if (Elem != Single) {
+ Single = SDValue();
+ break;
+ }
+ Count += 1;
+ }
+ }
+ // There are three cases here:
+ //
+ // - if the only defined element is a loaded one, the best sequence
+ // is a replicating load.
+ //
+ // - otherwise, if the only defined element is an i64 value, we will
+ // end up with the same VLVGP sequence regardless of whether we short-cut
+ // for replication or fall through to the later code.
+ //
+ // - otherwise, if the only defined element is an i32 or smaller value,
+ // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
+ // This is only a win if the single defined element is used more than once.
+ // In other cases we're better off using a single VLVGx.
+ if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
+ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
+
+ // The best way of building a v2i64 from two i64s is to use VLVGP.
+ if (VT == MVT::v2i64)
+ return joinDwords(DAG, DL, Elems[0], Elems[1]);
+
+ // Use a 64-bit merge high to combine two doubles.
+ if (VT == MVT::v2f64)
+ return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
+
+ // Build v4f32 values directly from the FPRs:
+ //
+ // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
+ // V V VMRHF
+ // <ABxx> <CDxx>
+ // V VMRHG
+ // <ABCD>
+ if (VT == MVT::v4f32) {
+ SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
+ SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
+ // Avoid unnecessary undefs by reusing the other operand.
+ if (Op01.getOpcode() == ISD::UNDEF)
+ Op01 = Op23;
+ else if (Op23.getOpcode() == ISD::UNDEF)
+ Op23 = Op01;
+ // Merging identical replications is a no-op.
+ if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
+ return Op01;
+ Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
+ Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
+ SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
+ DL, MVT::v2i64, Op01, Op23);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
+
+ // Collect the constant terms.
+ SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
+ SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
+
+ unsigned NumConstants = 0;
+ for (unsigned I = 0; I < NumElements; ++I) {
+ SDValue Elem = Elems[I];
+ if (Elem.getOpcode() == ISD::Constant ||
+ Elem.getOpcode() == ISD::ConstantFP) {
+ NumConstants += 1;
+ Constants[I] = Elem;
+ Done[I] = true;
+ }
+ }
+ // If there was at least one constant, fill in the other elements of
+ // Constants with undefs to get a full vector constant and use that
+ // as the starting point.
+ SDValue Result;
+ if (NumConstants > 0) {
+ for (unsigned I = 0; I < NumElements; ++I)
+ if (!Constants[I].getNode())
+ Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
+ Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants);
+ } else {
+ // Otherwise try to use VLVGP to start the sequence in order to
+ // avoid a false dependency on any previous contents of the vector
+ // register. This only makes sense if one of the associated elements
+ // is defined.
+ unsigned I1 = NumElements / 2 - 1;
+ unsigned I2 = NumElements - 1;
+ bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF);
+ bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF);
+ if (Def1 || Def2) {
+ SDValue Elem1 = Elems[Def1 ? I1 : I2];
+ SDValue Elem2 = Elems[Def2 ? I2 : I1];
+ Result = DAG.getNode(ISD::BITCAST, DL, VT,
+ joinDwords(DAG, DL, Elem1, Elem2));
+ Done[I1] = true;
+ Done[I2] = true;
+ } else
+ Result = DAG.getUNDEF(VT);
+ }
+
+ // Use VLVGx to insert the other elements.
+ for (unsigned I = 0; I < NumElements; ++I)
+ if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF)
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
+ DAG.getConstant(I, DL, MVT::i32));
+ return Result;
+}
+
+SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ if (BVN->isConstant()) {
+ // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
+ // preferred way of creating all-zero and all-one vectors so give it
+ // priority over other methods below.
+ uint64_t Mask = 0;
+ if (tryBuildVectorByteMask(BVN, Mask)) {
+ SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
+ DAG.getConstant(Mask, DL, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
+
+ // Try using some form of replication.
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+ 8, true) &&
+ SplatBitSize <= 64) {
+ // First try assuming that any undefined bits above the highest set bit
+ // and below the lowest set bit are 1s. This increases the likelihood of
+ // being able to use a sign-extended element value in VECTOR REPLICATE
+ // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
+ uint64_t SplatBitsZ = SplatBits.getZExtValue();
+ uint64_t SplatUndefZ = SplatUndef.getZExtValue();
+ uint64_t Lower = (SplatUndefZ
+ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
+ uint64_t Upper = (SplatUndefZ
+ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
+ uint64_t Value = SplatBitsZ | Upper | Lower;
+ SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
+ SplatBitSize);
+ if (Op.getNode())
+ return Op;
+
+ // Now try assuming that any undefined bits between the first and
+ // last defined set bits are set. This increases the chances of
+ // using a non-wraparound mask.
+ uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
+ Value = SplatBitsZ | Middle;
+ Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
+ if (Op.getNode())
+ return Op;
+ }
+
+ // Fall back to loading it from memory.
+ return SDValue();
+ }
+
+ // See if we should use shuffles to construct the vector from other vectors.
+ SDValue Res = tryBuildVectorShuffle(DAG, BVN);
+ if (Res.getNode())
+ return Res;
+
+ // Detect SCALAR_TO_VECTOR conversions.
+ if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
+ return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
+
+ // Otherwise use buildVector to build the vector up from GPRs.
+ unsigned NumElements = Op.getNumOperands();
+ SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
+ for (unsigned I = 0; I < NumElements; ++I)
+ Ops[I] = Op.getOperand(I);
+ return buildVector(DAG, DL, VT, Ops);
+}
+
+SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ unsigned NumElements = VT.getVectorNumElements();
+
+ if (VSN->isSplat()) {
+ SDValue Op0 = Op.getOperand(0);
+ unsigned Index = VSN->getSplatIndex();
+ assert(Index < VT.getVectorNumElements() &&
+ "Splat index should be defined and in first operand");
+ // See whether the value we're splatting is directly available as a scalar.
+ if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
+ Op0.getOpcode() == ISD::BUILD_VECTOR)
+ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
+ // Otherwise keep it as a vector-to-vector operation.
+ return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
+ DAG.getConstant(Index, DL, MVT::i32));
+ }
+
+ GeneralShuffle GS(VT);
+ for (unsigned I = 0; I < NumElements; ++I) {
+ int Elt = VSN->getMaskElt(I);
+ if (Elt < 0)
+ GS.addUndef();
+ else
+ GS.add(Op.getOperand(unsigned(Elt) / NumElements),
+ unsigned(Elt) % NumElements);
+ }
+ return GS.getNode(DAG, SDLoc(VSN));
+}
+
+SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ // Just insert the scalar into element 0 of an undefined vector.
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
+ Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
+ Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
+}
+
+SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Handle insertions of floating-point values.
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ EVT VT = Op.getValueType();
+
+ // Insertions into constant indices of a v2f64 can be done using VPDI.
+ // However, if the inserted value is a bitcast or a constant then it's
+ // better to use GPRs, as below.
+ if (VT == MVT::v2f64 &&
+ Op1.getOpcode() != ISD::BITCAST &&
+ Op1.getOpcode() != ISD::ConstantFP &&
+ Op2.getOpcode() == ISD::Constant) {
+ uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue();
+ unsigned Mask = VT.getVectorNumElements() - 1;
+ if (Index <= Mask)
+ return Op;
+ }
+
+ // Otherwise bitcast to the equivalent integer form and insert via a GPR.
+ MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
+ MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
+ SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
+ DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
+ DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Res);
+}
+
+SDValue
+SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Handle extractions of floating-point values.
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ EVT VecVT = Op0.getValueType();
+
+ // Extractions of constant indices can be done directly.
+ if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
+ uint64_t Index = CIndexN->getZExtValue();
+ unsigned Mask = VecVT.getVectorNumElements() - 1;
+ if (Index <= Mask)
+ return Op;
+ }
+
+ // Otherwise bitcast to the equivalent integer form and extract via a GPR.
+ MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
+ MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
+ DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Res);
+}
+
+SDValue
+SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
+ unsigned UnpackHigh) const {
+ SDValue PackedOp = Op.getOperand(0);
+ EVT OutVT = Op.getValueType();
+ EVT InVT = PackedOp.getValueType();
+ unsigned ToBits = OutVT.getVectorElementType().getSizeInBits();
+ unsigned FromBits = InVT.getVectorElementType().getSizeInBits();
+ do {
+ FromBits *= 2;
+ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
+ SystemZ::VectorBits / FromBits);
+ PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
+ } while (FromBits != ToBits);
+ return PackedOp;
+}
+
+SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
+ unsigned ByScalar) const {
+ // Look for cases where a vector shift can use the *_BY_SCALAR form.
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
+
+ // See whether the shift vector is a splat represented as BUILD_VECTOR.
+ if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ // Check for constant splats. Use ElemBitSize as the minimum element
+ // width and reject splats that need wider elements.
+ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+ ElemBitSize, true) &&
+ SplatBitSize == ElemBitSize) {
+ SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
+ DL, MVT::i32);
+ return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
+ }
+ // Check for variable splats.
+ BitVector UndefElements;
+ SDValue Splat = BVN->getSplatValue(&UndefElements);
+ if (Splat) {
+ // Since i32 is the smallest legal type, we either need a no-op
+ // or a truncation.
+ SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
+ return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
+ }
+ }
+
+ // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
+ // and the shift amount is directly available in a GPR.
+ if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
+ if (VSN->isSplat()) {
+ SDValue VSNOp0 = VSN->getOperand(0);
+ unsigned Index = VSN->getSplatIndex();
+ assert(Index < VT.getVectorNumElements() &&
+ "Splat index should be defined and in first operand");
+ if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
+ VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
+ // Since i32 is the smallest legal type, we either need a no-op
+ // or a truncation.
+ SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
+ VSNOp0.getOperand(Index));
+ return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
+ }
+ }
+ }
+
+ // Otherwise just treat the current form as legal.
+ return Op;
+}
+
SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -2437,6 +4296,14 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerUDIVREM(Op, DAG);
case ISD::OR:
return lowerOR(Op, DAG);
+ case ISD::CTPOP:
+ return lowerCTPOP(Op, DAG);
+ case ISD::CTLZ_ZERO_UNDEF:
+ return DAG.getNode(ISD::CTLZ, SDLoc(Op),
+ Op.getValueType(), Op.getOperand(0));
+ case ISD::CTTZ_ZERO_UNDEF:
+ return DAG.getNode(ISD::CTTZ, SDLoc(Op),
+ Op.getValueType(), Op.getOperand(0));
case ISD::ATOMIC_SWAP:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
case ISD::ATOMIC_STORE:
@@ -2471,6 +4338,30 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerSTACKRESTORE(Op, DAG);
case ISD::PREFETCH:
return lowerPREFETCH(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN:
+ return lowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN:
+ return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::BUILD_VECTOR:
+ return lowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE:
+ return lowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR:
+ return lowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return lowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
+ case ISD::SHL:
+ return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
+ case ISD::SRL:
+ return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
+ case ISD::SRA:
+ return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
default:
llvm_unreachable("Unexpected node to lower");
}
@@ -2478,10 +4369,13 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
- switch (Opcode) {
+ switch ((SystemZISD::NodeType)Opcode) {
+ case SystemZISD::FIRST_NUMBER: break;
OPCODE(RET_FLAG);
OPCODE(CALL);
OPCODE(SIBCALL);
+ OPCODE(TLS_GDCALL);
+ OPCODE(TLS_LDCALL);
OPCODE(PCREL_WRAPPER);
OPCODE(PCREL_OFFSET);
OPCODE(IABS);
@@ -2492,7 +4386,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(SELECT_CCMASK);
OPCODE(ADJDYNALLOC);
OPCODE(EXTRACT_ACCESS);
+ OPCODE(POPCNT);
OPCODE(UMUL_LOHI64);
+ OPCODE(SDIVREM32);
OPCODE(SDIVREM64);
OPCODE(UDIVREM32);
OPCODE(UDIVREM64);
@@ -2506,11 +4402,60 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(XC_LOOP);
OPCODE(CLC);
OPCODE(CLC_LOOP);
- OPCODE(STRCMP);
OPCODE(STPCPY);
+ OPCODE(STRCMP);
OPCODE(SEARCH_STRING);
OPCODE(IPM);
OPCODE(SERIALIZE);
+ OPCODE(TBEGIN);
+ OPCODE(TBEGIN_NOFLOAT);
+ OPCODE(TEND);
+ OPCODE(BYTE_MASK);
+ OPCODE(ROTATE_MASK);
+ OPCODE(REPLICATE);
+ OPCODE(JOIN_DWORDS);
+ OPCODE(SPLAT);
+ OPCODE(MERGE_HIGH);
+ OPCODE(MERGE_LOW);
+ OPCODE(SHL_DOUBLE);
+ OPCODE(PERMUTE_DWORDS);
+ OPCODE(PERMUTE);
+ OPCODE(PACK);
+ OPCODE(PACKS_CC);
+ OPCODE(PACKLS_CC);
+ OPCODE(UNPACK_HIGH);
+ OPCODE(UNPACKL_HIGH);
+ OPCODE(UNPACK_LOW);
+ OPCODE(UNPACKL_LOW);
+ OPCODE(VSHL_BY_SCALAR);
+ OPCODE(VSRL_BY_SCALAR);
+ OPCODE(VSRA_BY_SCALAR);
+ OPCODE(VSUM);
+ OPCODE(VICMPE);
+ OPCODE(VICMPH);
+ OPCODE(VICMPHL);
+ OPCODE(VICMPES);
+ OPCODE(VICMPHS);
+ OPCODE(VICMPHLS);
+ OPCODE(VFCMPE);
+ OPCODE(VFCMPH);
+ OPCODE(VFCMPHE);
+ OPCODE(VFCMPES);
+ OPCODE(VFCMPHS);
+ OPCODE(VFCMPHES);
+ OPCODE(VFTCI);
+ OPCODE(VEXTEND);
+ OPCODE(VROUND);
+ OPCODE(VTM);
+ OPCODE(VFAE_CC);
+ OPCODE(VFAEZ_CC);
+ OPCODE(VFEE_CC);
+ OPCODE(VFEEZ_CC);
+ OPCODE(VFENE_CC);
+ OPCODE(VFENEZ_CC);
+ OPCODE(VISTR_CC);
+ OPCODE(VSTRC_CC);
+ OPCODE(VSTRCZ_CC);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
OPCODE(ATOMIC_LOADW_SUB);
@@ -2529,6 +4474,157 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
#undef OPCODE
}
+// Return true if VT is a vector whose elements are a whole number of bytes
+// in width.
+static bool canTreatAsByteVector(EVT VT) {
+ return VT.isVector() && VT.getVectorElementType().getSizeInBits() % 8 == 0;
+}
+
+// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
+// producing a result of type ResVT. Op is a possibly bitcast version
+// of the input vector and Index is the index (based on type VecVT) that
+// should be extracted. Return the new extraction if a simplification
+// was possible or if Force is true.
+SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
+ SDValue Op, unsigned Index,
+ DAGCombinerInfo &DCI,
+ bool Force) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ // The number of bytes being extracted.
+ unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
+
+ for (;;) {
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode == ISD::BITCAST)
+ // Look through bitcasts.
+ Op = Op.getOperand(0);
+ else if (Opcode == ISD::VECTOR_SHUFFLE &&
+ canTreatAsByteVector(Op.getValueType())) {
+ // Get a VPERM-like permute mask and see whether the bytes covered
+ // by the extracted element are a contiguous sequence from one
+ // source operand.
+ SmallVector<int, SystemZ::VectorBytes> Bytes;
+ getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
+ int First;
+ if (!getShuffleInput(Bytes, Index * BytesPerElement,
+ BytesPerElement, First))
+ break;
+ if (First < 0)
+ return DAG.getUNDEF(ResVT);
+ // Make sure the contiguous sequence starts at a multiple of the
+ // original element size.
+ unsigned Byte = unsigned(First) % Bytes.size();
+ if (Byte % BytesPerElement != 0)
+ break;
+ // We can get the extracted value directly from an input.
+ Index = Byte / BytesPerElement;
+ Op = Op.getOperand(unsigned(First) / Bytes.size());
+ Force = true;
+ } else if (Opcode == ISD::BUILD_VECTOR &&
+ canTreatAsByteVector(Op.getValueType())) {
+ // We can only optimize this case if the BUILD_VECTOR elements are
+ // at least as wide as the extracted value.
+ EVT OpVT = Op.getValueType();
+ unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
+ if (OpBytesPerElement < BytesPerElement)
+ break;
+ // Make sure that the least-significant bit of the extracted value
+ // is the least significant bit of an input.
+ unsigned End = (Index + 1) * BytesPerElement;
+ if (End % OpBytesPerElement != 0)
+ break;
+ // We're extracting the low part of one operand of the BUILD_VECTOR.
+ Op = Op.getOperand(End / OpBytesPerElement - 1);
+ if (!Op.getValueType().isInteger()) {
+ EVT VT = MVT::getIntegerVT(Op.getValueType().getSizeInBits());
+ Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ DCI.AddToWorklist(Op.getNode());
+ }
+ EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
+ Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+ if (VT != ResVT) {
+ DCI.AddToWorklist(Op.getNode());
+ Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
+ }
+ return Op;
+ } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ canTreatAsByteVector(Op.getValueType()) &&
+ canTreatAsByteVector(Op.getOperand(0).getValueType())) {
+ // Make sure that only the unextended bits are significant.
+ EVT ExtVT = Op.getValueType();
+ EVT OpVT = Op.getOperand(0).getValueType();
+ unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
+ unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
+ unsigned Byte = Index * BytesPerElement;
+ unsigned SubByte = Byte % ExtBytesPerElement;
+ unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
+ if (SubByte < MinSubByte ||
+ SubByte + BytesPerElement > ExtBytesPerElement)
+ break;
+ // Get the byte offset of the unextended element
+ Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
+ // ...then add the byte offset relative to that element.
+ Byte += SubByte - MinSubByte;
+ if (Byte % BytesPerElement != 0)
+ break;
+ Op = Op.getOperand(0);
+ Index = Byte / BytesPerElement;
+ Force = true;
+ } else
+ break;
+ }
+ if (Force) {
+ if (Op.getValueType() != VecVT) {
+ Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
+ DCI.AddToWorklist(Op.getNode());
+ }
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
+ DAG.getConstant(Index, DL, MVT::i32));
+ }
+ return SDValue();
+}
+
+// Optimize vector operations in scalar value Op on the basis that Op
+// is truncated to TruncVT.
+SDValue
+SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
+ DAGCombinerInfo &DCI) const {
+ // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
+ // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
+ // of type TruncVT.
+ if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ TruncVT.getSizeInBits() % 8 == 0) {
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+ if (canTreatAsByteVector(VecVT)) {
+ if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
+ unsigned TruncBytes = TruncVT.getStoreSize();
+ if (BytesPerElement % TruncBytes == 0) {
+ // Calculate the value of Y' in the above description. We are
+ // splitting the original elements into Scale equal-sized pieces
+ // and for truncation purposes want the last (least-significant)
+ // of these pieces for IndexN. This is easiest to do by calculating
+ // the start index of the following element and then subtracting 1.
+ unsigned Scale = BytesPerElement / TruncBytes;
+ unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
+
+ // Defer the creation of the bitcast from X to combineExtract,
+ // which might be able to optimize the extraction.
+ VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
+ VecVT.getStoreSize() / TruncBytes);
+ EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
+ return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -2552,9 +4648,118 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
Inner.getOperand(0));
SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
- DAG.getConstant(NewShlAmt, ShiftVT));
+ DAG.getConstant(NewShlAmt, SDLoc(Inner),
+ ShiftVT));
return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
- DAG.getConstant(NewSraAmt, ShiftVT));
+ DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
+ }
+ }
+ }
+ }
+ if (Opcode == SystemZISD::MERGE_HIGH ||
+ Opcode == SystemZISD::MERGE_LOW) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() == ISD::BITCAST)
+ Op0 = Op0.getOperand(0);
+ if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
+ cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
+ // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
+ // for v4f32.
+ if (Op1 == N->getOperand(0))
+ return Op1;
+ // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
+ EVT VT = Op1.getValueType();
+ unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
+ if (ElemBytes <= 4) {
+ Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
+ SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
+ EVT InVT = VT.changeVectorElementTypeToInteger();
+ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
+ SystemZ::VectorBytes / ElemBytes / 2);
+ if (VT != InVT) {
+ Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
+ DCI.AddToWorklist(Op1.getNode());
+ }
+ SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
+ DCI.AddToWorklist(Op.getNode());
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
+ }
+ }
+ }
+ // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
+ // for the extraction to be done on a vMiN value, so that we can use VSTE.
+ // If X has wider elements then convert it to:
+ // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
+ if (Opcode == ISD::STORE) {
+ auto *SN = cast<StoreSDNode>(N);
+ EVT MemVT = SN->getMemoryVT();
+ if (MemVT.isInteger()) {
+ SDValue Value = combineTruncateExtract(SDLoc(N), MemVT,
+ SN->getValue(), DCI);
+ if (Value.getNode()) {
+ DCI.AddToWorklist(Value.getNode());
+
+ // Rewrite the store with the new form of stored value.
+ return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
+ SN->getBasePtr(), SN->getMemoryVT(),
+ SN->getMemOperand());
+ }
+ }
+ }
+ // Try to simplify a vector extraction.
+ if (Opcode == ISD::EXTRACT_VECTOR_ELT) {
+ if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ SDValue Op0 = N->getOperand(0);
+ EVT VecVT = Op0.getValueType();
+ return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
+ IndexN->getZExtValue(), DCI, false);
+ }
+ }
+ // (join_dwords X, X) == (replicate X)
+ if (Opcode == SystemZISD::JOIN_DWORDS &&
+ N->getOperand(0) == N->getOperand(1))
+ return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
+ N->getOperand(0));
+ // (fround (extract_vector_elt X 0))
+ // (fround (extract_vector_elt X 1)) ->
+ // (extract_vector_elt (VROUND X) 0)
+ // (extract_vector_elt (VROUND X) 1)
+ //
+ // This is a special case since the target doesn't really support v2f32s.
+ if (Opcode == ISD::FP_ROUND) {
+ SDValue Op0 = N->getOperand(0);
+ if (N->getValueType(0) == MVT::f32 &&
+ Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0).getValueType() == MVT::v2f64 &&
+ Op0.getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
+ SDValue Vec = Op0.getOperand(0);
+ for (auto *U : Vec->uses()) {
+ if (U != Op0.getNode() &&
+ U->hasOneUse() &&
+ U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ U->getOperand(0) == Vec &&
+ U->getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
+ SDValue OtherRound = SDValue(*U->use_begin(), 0);
+ if (OtherRound.getOpcode() == ISD::FP_ROUND &&
+ OtherRound.getOperand(0) == SDValue(U, 0) &&
+ OtherRound.getValueType() == MVT::f32) {
+ SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
+ MVT::v4f32, Vec);
+ DCI.AddToWorklist(VRound.getNode());
+ SDValue Extract1 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
+ VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
+ DCI.AddToWorklist(Extract1.getNode());
+ DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
+ SDValue Extract0 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
+ VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+ return Extract0;
+ }
}
}
}
@@ -2614,8 +4819,8 @@ static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr *MI,
MachineBasicBlock *MBB) const {
- const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(
- MBB->getParent()->getSubtarget().getInstrInfo());
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
unsigned DestReg = MI->getOperand(0).getReg();
unsigned TrueReg = MI->getOperand(1).getReg();
@@ -2663,8 +4868,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
MachineBasicBlock *MBB,
unsigned StoreOpcode, unsigned STOCOpcode,
bool Invert) const {
- const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(
- MBB->getParent()->getSubtarget().getInstrInfo());
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
unsigned SrcReg = MI->getOperand(0).getReg();
MachineOperand Base = MI->getOperand(1);
@@ -2733,7 +4938,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
bool Invert) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
bool IsSubWord = (BitSize < 32);
@@ -2853,7 +5058,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
unsigned BitSize) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
bool IsSubWord = (BitSize < 32);
@@ -2965,7 +5170,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
MachineBasicBlock *MBB) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
// Extract the operands. Base can be a register or a frame index.
@@ -3082,7 +5287,7 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
bool ClearEven, unsigned SubReg) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
@@ -3114,7 +5319,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
unsigned Opcode) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
@@ -3284,7 +5489,7 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
unsigned Opcode) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
@@ -3338,6 +5543,57 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
return DoneMBB;
}
+// Update TBEGIN instruction with final opcode and register clobbers.
+MachineBasicBlock *
+SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode,
+ bool NoFloat) const {
+ MachineFunction &MF = *MBB->getParent();
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
+
+ // Update opcode.
+ MI->setDesc(TII->get(Opcode));
+
+ // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
+ // Make sure to add the corresponding GRSM bits if they are missing.
+ uint64_t Control = MI->getOperand(2).getImm();
+ static const unsigned GPRControlBit[16] = {
+ 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
+ 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
+ };
+ Control |= GPRControlBit[15];
+ if (TFI->hasFP(MF))
+ Control |= GPRControlBit[11];
+ MI->getOperand(2).setImm(Control);
+
+ // Add GPR clobbers.
+ for (int I = 0; I < 16; I++) {
+ if ((Control & GPRControlBit[I]) == 0) {
+ unsigned Reg = SystemZMC::GR64Regs[I];
+ MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ }
+
+ // Add FPR/VR clobbers.
+ if (!NoFloat && (Control & 4) != 0) {
+ if (Subtarget.hasVector()) {
+ for (int I = 0; I < 32; I++) {
+ unsigned Reg = SystemZMC::VR128Regs[I];
+ MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ } else {
+ for (int I = 0; I < 16; I++) {
+ unsigned Reg = SystemZMC::FP64Regs[I];
+ MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ }
+ }
+
+ return MBB;
+}
+
MachineBasicBlock *SystemZTargetLowering::
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
switch (MI->getOpcode()) {
@@ -3579,6 +5835,12 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
return emitStringWrapper(MI, MBB, SystemZ::MVST);
case SystemZ::SRSTLoop:
return emitStringWrapper(MI, MBB, SystemZ::SRST);
+ case SystemZ::TBEGIN:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
+ case SystemZ::TBEGIN_nofloat:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
+ case SystemZ::TBEGINC:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
default:
llvm_unreachable("Unexpected instr type to insert");
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 887c236f1e78f..b001abc693d6f 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -22,7 +22,7 @@
namespace llvm {
namespace SystemZISD {
-enum {
+enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
// Return with a flag operand. Operand 0 is the chain operand.
@@ -34,6 +34,11 @@ enum {
CALL,
SIBCALL,
+ // TLS calls. Like regular calls, except operand 1 is the TLS symbol.
+ // (The call target is implicitly __tls_get_offset.)
+ TLS_GDCALL,
+ TLS_LDCALL,
+
// Wraps a TargetGlobalAddress that should be loaded using PC-relative
// accesses (LARL). Operand 0 is the address.
PCREL_WRAPPER,
@@ -82,6 +87,9 @@ enum {
// the number of the register.
EXTRACT_ACCESS,
+ // Count number of bits set in operand 0 per byte.
+ POPCNT,
+
// Wrappers around the ISD opcodes of the same name. The output and
// first input operands are GR128s. The trailing numbers are the
// widths of the second operand in bits.
@@ -138,6 +146,135 @@ enum {
// Perform a serialization operation. (BCR 15,0 or BCR 14,0.)
SERIALIZE,
+ // Transaction begin. The first operand is the chain, the second
+ // the TDB pointer, and the third the immediate control field.
+ // Returns chain and glue.
+ TBEGIN,
+ TBEGIN_NOFLOAT,
+
+ // Transaction end. Just the chain operand. Returns chain and glue.
+ TEND,
+
+ // Create a vector constant by filling byte N of the result with bit
+ // 15-N of the single operand.
+ BYTE_MASK,
+
+ // Create a vector constant by replicating an element-sized RISBG-style mask.
+ // The first operand specifies the starting set bit and the second operand
+ // specifies the ending set bit. Both operands count from the MSB of the
+ // element.
+ ROTATE_MASK,
+
+ // Replicate a GPR scalar value into all elements of a vector.
+ REPLICATE,
+
+ // Create a vector from two i64 GPRs.
+ JOIN_DWORDS,
+
+ // Replicate one element of a vector into all elements. The first operand
+ // is the vector and the second is the index of the element to replicate.
+ SPLAT,
+
+ // Interleave elements from the high half of operand 0 and the high half
+ // of operand 1.
+ MERGE_HIGH,
+
+ // Likewise for the low halves.
+ MERGE_LOW,
+
+ // Concatenate the vectors in the first two operands, shift them left
+ // by the third operand, and take the first half of the result.
+ SHL_DOUBLE,
+
+ // Take one element of the first v2i64 operand and the one element of
+ // the second v2i64 operand and concatenate them to form a v2i64 result.
+ // The third operand is a 4-bit value of the form 0A0B, where A and B
+ // are the element selectors for the first operand and second operands
+ // respectively.
+ PERMUTE_DWORDS,
+
+ // Perform a general vector permute on vector operands 0 and 1.
+ // Each byte of operand 2 controls the corresponding byte of the result,
+ // in the same way as a byte-level VECTOR_SHUFFLE mask.
+ PERMUTE,
+
+ // Pack vector operands 0 and 1 into a single vector with half-sized elements.
+ PACK,
+
+ // Likewise, but saturate the result and set CC. PACKS_CC does signed
+ // saturation and PACKLS_CC does unsigned saturation.
+ PACKS_CC,
+ PACKLS_CC,
+
+ // Unpack the first half of vector operand 0 into double-sized elements.
+ // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends.
+ UNPACK_HIGH,
+ UNPACKL_HIGH,
+
+ // Likewise for the second half.
+ UNPACK_LOW,
+ UNPACKL_LOW,
+
+ // Shift each element of vector operand 0 by the number of bits specified
+ // by scalar operand 1.
+ VSHL_BY_SCALAR,
+ VSRL_BY_SCALAR,
+ VSRA_BY_SCALAR,
+
+ // For each element of the output type, sum across all sub-elements of
+ // operand 0 belonging to the corresponding element, and add in the
+ // rightmost sub-element of the corresponding element of operand 1.
+ VSUM,
+
+ // Compare integer vector operands 0 and 1 to produce the usual 0/-1
+ // vector result. VICMPE is for equality, VICMPH for "signed greater than"
+ // and VICMPHL for "unsigned greater than".
+ VICMPE,
+ VICMPH,
+ VICMPHL,
+
+ // Likewise, but also set the condition codes on the result.
+ VICMPES,
+ VICMPHS,
+ VICMPHLS,
+
+ // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1
+ // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and
+ // greater than" and VFCMPHE for "ordered and greater than or equal to".
+ VFCMPE,
+ VFCMPH,
+ VFCMPHE,
+
+ // Likewise, but also set the condition codes on the result.
+ VFCMPES,
+ VFCMPHS,
+ VFCMPHES,
+
+ // Test floating-point data class for vectors.
+ VFTCI,
+
+ // Extend the even f32 elements of vector operand 0 to produce a vector
+ // of f64 elements.
+ VEXTEND,
+
+ // Round the f64 elements of vector operand 0 to f32s and store them in the
+ // even elements of the result.
+ VROUND,
+
+ // AND the two vector operands together and set CC based on the result.
+ VTM,
+
+ // String operations that set CC as a side-effect.
+ VFAE_CC,
+ VFAEZ_CC,
+ VFEE_CC,
+ VFEEZ_CC,
+ VFENE_CC,
+ VFENEZ_CC,
+ VISTR_CC,
+ VSTRC_CC,
+ VSTRCZ_CC,
+
// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
// ATOMIC_LOAD_<op>.
//
@@ -198,15 +335,40 @@ class SystemZTargetMachine;
class SystemZTargetLowering : public TargetLowering {
public:
- explicit SystemZTargetLowering(const TargetMachine &TM);
+ explicit SystemZTargetLowering(const TargetMachine &TM,
+ const SystemZSubtarget &STI);
// Override TargetLowering.
MVT getScalarShiftAmountTy(EVT LHSTy) const override {
return MVT::i32;
}
+ MVT getVectorIdxTy() const override {
+ // Only the lower 12 bits of an element index are used, so we don't
+ // want to clobber the upper 32 bits of a GPR unnecessarily.
+ return MVT::i32;
+ }
+ TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+ const override {
+ // Widen subvectors to the full width rather than promoting integer
+ // elements. This is better because:
+ //
+ // (a) it means that we can handle the ABI for passing and returning
+ // sub-128 vectors without having to handle them as legal types.
+ //
+ // (b) we don't have instructions to extend on load and truncate on store,
+ // so promoting the integers is less efficient.
+ //
+ // (c) there are no multiplication instructions for the widest integer
+ // type (v2i64).
+ if (VT.getVectorElementType().getSizeInBits() % 8 == 0)
+ return TypeWidenVector;
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+ }
EVT getSetCCResultType(LLVMContext &, EVT) const override;
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+ bool isLegalICmpImmediate(int64_t Imm) const override;
+ bool isLegalAddImmediate(int64_t Imm) const override;
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,
@@ -215,8 +377,9 @@ public:
bool isTruncateFree(EVT, EVT) const override;
const char *getTargetNodeName(unsigned Opcode) const override;
std::pair<unsigned, const TargetRegisterClass *>
- getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const override;
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
+ MVT VT) const override;
TargetLowering::ConstraintType
getConstraintType(const std::string &Constraint) const override;
TargetLowering::ConstraintWeight
@@ -226,6 +389,26 @@ public:
std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode.size() == 1) {
+ switch(ConstraintCode[0]) {
+ default:
+ break;
+ case 'Q':
+ return InlineAsm::Constraint_Q;
+ case 'R':
+ return InlineAsm::Constraint_R;
+ case 'S':
+ return InlineAsm::Constraint_S;
+ case 'T':
+ return InlineAsm::Constraint_T;
+ }
+ }
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const
override;
@@ -257,6 +440,9 @@ private:
SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
SelectionDAG &DAG) const;
+ SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG, unsigned Opcode,
+ SDValue GOTOffset) const;
SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SelectionDAG &DAG) const;
SDValue lowerBlockAddress(BlockAddressSDNode *Node,
@@ -272,6 +458,7 @@ private:
SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
@@ -282,6 +469,22 @@ private:
SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
+ unsigned UnpackHigh) const;
+ SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
+
+ SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
+ unsigned Index, DAGCombinerInfo &DCI,
+ bool Force) const;
+ SDValue combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
+ DAGCombinerInfo &DCI) const;
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.
@@ -319,6 +522,10 @@ private:
MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Opcode) const;
+ MachineBasicBlock *emitTransactionBegin(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode,
+ bool NoFloat) const;
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 4a5582fbf4e25..27fbd7df2882e 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -46,9 +46,14 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>;
defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>;
}
-defm : CompareZeroFP<LTEBRCompare, FP32>;
-defm : CompareZeroFP<LTDBRCompare, FP64>;
-defm : CompareZeroFP<LTXBRCompare, FP128>;
+// Note that the comparison against zero operation is not available if we
+// have vector support, since load-and-test instructions will partially
+// clobber the target (vector) register.
+let Predicates = [FeatureNoVector] in {
+ defm : CompareZeroFP<LTEBRCompare, FP32>;
+ defm : CompareZeroFP<LTDBRCompare, FP64>;
+ defm : CompareZeroFP<LTXBRCompare, FP128>;
+}
// Moves between 64-bit integer and floating-point registers.
def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>;
@@ -98,6 +103,9 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>;
defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>;
+ // For z13 we prefer LDE over LE to avoid partial register dependencies.
+ def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>;
+
// These instructions are split after register allocation, so we don't
// want a custom inserter.
let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
@@ -141,7 +149,7 @@ def LDXBRA : UnaryRRF4<"ldxbra", 0xB345, FP128, FP128>,
Requires<[FeatureFPExtension]>;
def : Pat<(f32 (fround FP128:$src)),
- (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
+ (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>;
def : Pat<(f64 (fround FP128:$src)),
(EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
@@ -345,13 +353,13 @@ def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>;
def MDEBR : BinaryRRE<"mdeb", 0xB30C, null_frag, FP64, FP32>;
def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))),
(MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- FP32:$src1, subreg_h32), FP32:$src2)>;
+ FP32:$src1, subreg_r32), FP32:$src2)>;
// f64 multiplication of an FP32 register and an f32 memory.
def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
def : Pat<(fmul (f64 (fextend FP32:$src1)),
(f64 (extloadf32 bdxaddr12only:$addr))),
- (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
+ (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32),
bdxaddr12only:$addr)>;
// f128 multiplication of two FP64 registers.
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 9f59a1c8e7e3c..71eb9986499b6 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -142,10 +142,13 @@ def getThreeOperandOpcode : InstrMapping {
// Formats are specified using operand field declarations of the form:
//
// bits<4> Rn : register input or output for operand n
+// bits<5> Vn : vector register input or output for operand n
// bits<m> In : immediate value of width m for operand n
// bits<4> BDn : address operand n, which has a base and a displacement
// bits<m> XBDn : address operand n, which has an index, a base and a
// displacement
+// bits<m> VBDn : address operand n, which has a vector index, a base and a
+// displacement
// bits<4> Xn : index register for address operand n
// bits<4> Mn : mode value for operand n
//
@@ -339,11 +342,13 @@ class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
bits<4> R1;
bits<20> XBD2;
+ bits<4> M3;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R1;
let Inst{35-16} = XBD2;
- let Inst{15-8} = 0;
+ let Inst{15-12} = M3;
+ let Inst{11-8} = 0;
let Inst{7-0} = op{7-0};
let HasIndex = 1;
@@ -473,6 +478,393 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{15-0} = BD2;
}
+class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<16> BD2;
+
+ let Inst{31-16} = op;
+ let Inst{15-0} = BD2;
+}
+
+class InstVRIa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<16> I2;
+ bits<4> M3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = 0;
+ let Inst{31-16} = I2;
+ let Inst{15-12} = M3;
+ let Inst{11} = V1{4};
+ let Inst{10-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRIb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<8> I2;
+ bits<8> I3;
+ bits<4> M4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = 0;
+ let Inst{31-24} = I2;
+ let Inst{23-16} = I3;
+ let Inst{15-12} = M4;
+ let Inst{11} = V1{4};
+ let Inst{10-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRIc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V3;
+ bits<16> I2;
+ bits<4> M4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V3{3-0};
+ let Inst{31-16} = I2;
+ let Inst{15-12} = M4;
+ let Inst{11} = V1{4};
+ let Inst{10} = V3{4};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRId<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<5> V3;
+ bits<8> I4;
+ bits<4> M5;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-28} = V3{3-0};
+ let Inst{27-24} = 0;
+ let Inst{23-16} = I4;
+ let Inst{15-12} = M5;
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9} = V3{4};
+ let Inst{8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRIe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<12> I3;
+ bits<4> M4;
+ bits<4> M5;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-20} = I3;
+ let Inst{19-16} = M5;
+ let Inst{15-12} = M4;
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+// Depending on the instruction mnemonic, certain bits may be or-ed into
+// the M4 value provided as explicit operand. These are passed as m4or.
+class InstVRRa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ bits<4> m4or = 0>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<4> M3;
+ bits<4> M4;
+ bits<4> M5;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-24} = 0;
+ let Inst{23-20} = M5;
+ let Inst{19} = !if (!eq (m4or{3}, 1), 1, M4{3});
+ let Inst{18} = !if (!eq (m4or{2}, 1), 1, M4{2});
+ let Inst{17} = !if (!eq (m4or{1}, 1), 1, M4{1});
+ let Inst{16} = !if (!eq (m4or{0}, 1), 1, M4{0});
+ let Inst{15-12} = M3;
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+// Depending on the instruction mnemonic, certain bits may be or-ed into
+// the M5 value provided as explicit operand. These are passed as m5or.
+class InstVRRb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ bits<4> m5or = 0>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<5> V3;
+ bits<4> M4;
+ bits<4> M5;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-28} = V3{3-0};
+ let Inst{27-24} = 0;
+ let Inst{23} = !if (!eq (m5or{3}, 1), 1, M5{3});
+ let Inst{22} = !if (!eq (m5or{2}, 1), 1, M5{2});
+ let Inst{21} = !if (!eq (m5or{1}, 1), 1, M5{1});
+ let Inst{20} = !if (!eq (m5or{0}, 1), 1, M5{0});
+ let Inst{19-16} = 0;
+ let Inst{15-12} = M4;
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9} = V3{4};
+ let Inst{8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRRc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<5> V3;
+ bits<4> M4;
+ bits<4> M5;
+ bits<4> M6;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-28} = V3{3-0};
+ let Inst{27-24} = 0;
+ let Inst{23-20} = M6;
+ let Inst{19-16} = M5;
+ let Inst{15-12} = M4;
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9} = V3{4};
+ let Inst{8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+// Depending on the instruction mnemonic, certain bits may be or-ed into
+// the M6 value provided as explicit operand. These are passed as m6or.
+class InstVRRd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ bits<4> m6or = 0>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<5> V3;
+ bits<5> V4;
+ bits<4> M5;
+ bits<4> M6;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-28} = V3{3-0};
+ let Inst{27-24} = M5;
+ let Inst{23} = !if (!eq (m6or{3}, 1), 1, M6{3});
+ let Inst{22} = !if (!eq (m6or{2}, 1), 1, M6{2});
+ let Inst{21} = !if (!eq (m6or{1}, 1), 1, M6{1});
+ let Inst{20} = !if (!eq (m6or{0}, 1), 1, M6{0});
+ let Inst{19-16} = 0;
+ let Inst{15-12} = V4{3-0};
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9} = V3{4};
+ let Inst{8} = V4{4};
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRRe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<5> V3;
+ bits<5> V4;
+ bits<4> M5;
+ bits<4> M6;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-28} = V3{3-0};
+ let Inst{27-24} = M6;
+ let Inst{23-20} = 0;
+ let Inst{19-16} = M5;
+ let Inst{15-12} = V4{3-0};
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9} = V3{4};
+ let Inst{8} = V4{4};
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRRf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<4> R2;
+ bits<4> R3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = R2;
+ let Inst{31-28} = R3;
+ let Inst{27-12} = 0;
+ let Inst{11} = V1{4};
+ let Inst{10-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRSa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<16> BD2;
+ bits<5> V3;
+ bits<4> M4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V3{3-0};
+ let Inst{31-16} = BD2;
+ let Inst{15-12} = M4;
+ let Inst{11} = V1{4};
+ let Inst{10} = V3{4};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRSb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<16> BD2;
+ bits<4> R3;
+ bits<4> M4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = R3;
+ let Inst{31-16} = BD2;
+ let Inst{15-12} = M4;
+ let Inst{11} = V1{4};
+ let Inst{10-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRSc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<16> BD2;
+ bits<5> V3;
+ bits<4> M4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = V3{3-0};
+ let Inst{31-16} = BD2;
+ let Inst{15-12} = M4;
+ let Inst{11} = 0;
+ let Inst{10} = V3{4};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRV<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<21> VBD2;
+ bits<4> M3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-16} = VBD2{19-0};
+ let Inst{15-12} = M3;
+ let Inst{11} = V1{4};
+ let Inst{10} = VBD2{20};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<20> XBD2;
+ bits<4> M3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-16} = XBD2;
+ let Inst{15-12} = M3;
+ let Inst{11} = V1{4};
+ let Inst{10-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
//===----------------------------------------------------------------------===//
// Instruction definitions with semantics
//===----------------------------------------------------------------------===//
@@ -492,12 +884,6 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
// form of the source register in the destination register and
// branches on the result.
//
-// Store:
-// One register or immediate input operand and one address input operand.
-// The instruction stores the first operand to the address.
-//
-// This category is used for both pure and truncating stores.
-//
// LoadMultiple:
// One address input operand and two explicit output operands.
// The instruction loads a range of registers from the address,
@@ -510,18 +896,35 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
// with the explicit operands giving the first and last register
// to store. Other stored registers are added as implicit uses.
//
+// StoreLength:
+// One value operand, one length operand and one address operand.
+// The instruction stores the value operand to the address but
+// doesn't write more than the number of bytes specified by the
+// length operand.
+//
// Unary:
// One register output operand and one input operand.
//
+// Store:
+// One address operand and one other input operand. The instruction
+// stores to the address.
+//
// Binary:
// One register output operand and two input operands.
//
+// StoreBinary:
+// One address operand and two other input operands. The instruction
+// stores to the address.
+//
// Compare:
// Two input operands and an implicit CC output operand.
//
// Ternary:
// One register output operand and three input operands.
//
+// Quaternary:
+// One register output operand and four input operands.
+//
// LoadAndOp:
// One output operand and two input operands, one of which is an address.
// The instruction both reads from and writes to the address.
@@ -556,6 +959,12 @@ class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls,
let R2 = 0;
}
+class InherentVRIa<string mnemonic, bits<16> opcode, bits<16> value>
+ : InstVRIa<opcode, (outs VR128:$V1), (ins), mnemonic#"\t$V1", []> {
+ let I2 = value;
+ let M3 = 0;
+}
+
class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls>
: InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$I2),
mnemonic##"\t$R1, $I2", []> {
@@ -571,6 +980,13 @@ class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
let mayLoad = 1;
}
+class LoadMultipleVRSa<string mnemonic, bits<16> opcode>
+ : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), (ins bdaddr12only:$BD2),
+ mnemonic#"\t$V1, $V3, $BD2", []> {
+ let M4 = 0;
+ let mayLoad = 1;
+}
+
class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls>
: InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2),
@@ -619,12 +1035,39 @@ multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
}
}
+class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr, bits<5> bytes, bits<4> type = 0>
+ : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2),
+ mnemonic#"\t$V1, $XBD2",
+ [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2)))]> {
+ let M3 = type;
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
+class StoreLengthVRSb<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, bits<5> bytes>
+ : InstVRSb<opcode, (outs), (ins VR128:$V1, GR32:$R3, bdaddr12only:$BD2),
+ mnemonic#"\t$V1, $R3, $BD2",
+ [(operator VR128:$V1, GR32:$R3, bdaddr12only:$BD2)]> {
+ let M4 = 0;
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, bdaddr20only:$BD2),
mnemonic#"\t$R1, $R3, $BD2", []> {
let mayStore = 1;
}
+class StoreMultipleVRSa<string mnemonic, bits<16> opcode>
+ : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, bdaddr12only:$BD2),
+ mnemonic#"\t$V1, $V3, $BD2", []> {
+ let M4 = 0;
+ let mayStore = 1;
+}
+
// StoreSI* instructions are used to store an integer to memory, but the
// addresses are more restricted than for normal stores. If we are in the
// situation of having to force either the address into a register or the
@@ -857,6 +1300,7 @@ class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let OpType = "mem";
let mayLoad = 1;
let AccessBytes = bytes;
+ let M3 = 0;
}
class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
@@ -883,6 +1327,46 @@ multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
}
}
+class UnaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr, Immediate imm, bits<4> type = 0>
+ : InstVRIa<opcode, (outs tr.op:$V1), (ins imm:$I2),
+ mnemonic#"\t$V1, $I2",
+ [(set tr.op:$V1, (tr.vt (operator imm:$I2)))]> {
+ let M3 = type;
+}
+
+class UnaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0,
+ bits<4> m5 = 0>
+ : InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2),
+ mnemonic#"\t$V1, $V2",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]> {
+ let M3 = type;
+ let M4 = m4;
+ let M5 = m5;
+}
+
+multiclass UnaryVRRaSPair<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ SDPatternOperator operator_cc, TypedReg tr1,
+ TypedReg tr2, bits<4> type, bits<4> modifier = 0,
+ bits<4> modifier_cc = 1> {
+ def "" : UnaryVRRa<mnemonic, opcode, operator, tr1, tr2, type, 0, modifier>;
+ let Defs = [CC] in
+ def S : UnaryVRRa<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, 0,
+ modifier_cc>;
+}
+
+class UnaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr, bits<5> bytes, bits<4> type = 0>
+ : InstVRX<opcode, (outs tr.op:$V1), (ins bdxaddr12only:$XBD2),
+ mnemonic#"\t$V1, $XBD2",
+ [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2)))]> {
+ let M3 = type;
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
: InstRR<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
@@ -1036,6 +1520,7 @@ class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let DisableEncoding = "$R1src";
let mayLoad = 1;
let AccessBytes = bytes;
+ let M3 = 0;
}
class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
@@ -1094,6 +1579,148 @@ multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,
}
}
+class BinaryVRIb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr, bits<4> type>
+ : InstVRIb<opcode, (outs tr.op:$V1), (ins imm32zx8:$I2, imm32zx8:$I3),
+ mnemonic#"\t$V1, $I2, $I3",
+ [(set tr.op:$V1, (tr.vt (operator imm32zx8:$I2, imm32zx8:$I3)))]> {
+ let M4 = type;
+}
+
+class BinaryVRIc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type>
+ : InstVRIc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, imm32zx16:$I2),
+ mnemonic#"\t$V1, $V3, $I2",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V3),
+ imm32zx16:$I2)))]> {
+ let M4 = type;
+}
+
+class BinaryVRIe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m5>
+ : InstVRIe<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx12:$I3),
+ mnemonic#"\t$V1, $V2, $I3",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ imm32zx12:$I3)))]> {
+ let M4 = type;
+ let M5 = m5;
+}
+
+class BinaryVRRa<string mnemonic, bits<16> opcode>
+ : InstVRRa<opcode, (outs VR128:$V1), (ins VR128:$V2, imm32zx4:$M3),
+ mnemonic#"\t$V1, $V2, $M3", []> {
+ let M4 = 0;
+ let M5 = 0;
+}
+
+class BinaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type = 0,
+ bits<4> modifier = 0>
+ : InstVRRb<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3),
+ mnemonic#"\t$V1, $V2, $V3",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3))))]> {
+ let M4 = type;
+ let M5 = modifier;
+}
+
+// Declare a pair of instructions, one which sets CC and one which doesn't.
+// The CC-setting form ends with "S" and sets the low bit of M5.
+multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ SDPatternOperator operator_cc, TypedReg tr1,
+ TypedReg tr2, bits<4> type,
+ bits<4> modifier = 0, bits<4> modifier_cc = 1> {
+ def "" : BinaryVRRb<mnemonic, opcode, operator, tr1, tr2, type, modifier>;
+ let Defs = [CC] in
+ def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ modifier_cc>;
+}
+
+class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m5 = 0,
+ bits<4> m6 = 0>
+ : InstVRRc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3),
+ mnemonic#"\t$V1, $V2, $V3",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3))))]> {
+ let M4 = type;
+ let M5 = m5;
+ let M6 = m6;
+}
+
+multiclass BinaryVRRcSPair<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ SDPatternOperator operator_cc, TypedReg tr1,
+ TypedReg tr2, bits<4> type, bits<4> m5,
+ bits<4> modifier = 0, bits<4> modifier_cc = 1> {
+ def "" : BinaryVRRc<mnemonic, opcode, operator, tr1, tr2, type, m5, modifier>;
+ let Defs = [CC] in
+ def S : BinaryVRRc<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ m5, modifier_cc>;
+}
+
+class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr>
+ : InstVRRf<opcode, (outs tr.op:$V1), (ins GR64:$R2, GR64:$R3),
+ mnemonic#"\t$V1, $R2, $R3",
+ [(set tr.op:$V1, (tr.vt (operator GR64:$R2, GR64:$R3)))]>;
+
+class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type>
+ : InstVRSa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, shift12only:$BD2),
+ mnemonic#"\t$V1, $V3, $BD2",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V3),
+ shift12only:$BD2)))]> {
+ let M4 = type;
+}
+
+class BinaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ bits<5> bytes>
+ : InstVRSb<opcode, (outs VR128:$V1), (ins GR32:$R3, bdaddr12only:$BD2),
+ mnemonic#"\t$V1, $R3, $BD2",
+ [(set VR128:$V1, (operator GR32:$R3, bdaddr12only:$BD2))]> {
+ let M4 = 0;
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class BinaryVRSc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr, bits<4> type>
+ : InstVRSc<opcode, (outs GR64:$R1), (ins tr.op:$V3, shift12only:$BD2),
+ mnemonic#"\t$R1, $V3, $BD2",
+ [(set GR64:$R1, (operator (tr.vt tr.op:$V3), shift12only:$BD2))]> {
+ let M4 = type;
+}
+
+class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr, bits<5> bytes>
+ : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
+ mnemonic#"\t$V1, $XBD2, $M3",
+ [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2,
+ imm32zx4:$M3)))]> {
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
+ Immediate index>
+ : InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3),
+ mnemonic#"\t$V1, $VBD2, $M3", []> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
+class StoreBinaryVRX<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, TypedReg tr, bits<5> bytes,
+ Immediate index>
+ : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2, index:$M3),
+ mnemonic#"\t$V1, $XBD2, $M3",
+ [(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2, index:$M3)]> {
+ let mayStore = 1;
+ let AccessBytes = bytes;
+}
+
class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
: InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2),
@@ -1166,6 +1793,7 @@ class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let isCompare = 1;
let mayLoad = 1;
let AccessBytes = bytes;
+ let M3 = 0;
}
class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
@@ -1235,6 +1863,17 @@ multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
}
}
+class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr, bits<4> type>
+ : InstVRRa<opcode, (outs), (ins tr.op:$V1, tr.op:$V2),
+ mnemonic#"\t$V1, $V2",
+ [(operator (tr.vt tr.op:$V1), (tr.vt tr.op:$V2))]> {
+ let isCompare = 1;
+ let M3 = type;
+ let M4 = 0;
+ let M5 = 0;
+}
+
class TernaryRRD<string mnemonic, bits<16> opcode,
SDPatternOperator operator, RegisterOperand cls>
: InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2),
@@ -1261,6 +1900,188 @@ class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let AccessBytes = bytes;
}
+class TernaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index>
+ : InstVRIa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V1src, imm:$I2, index:$M3),
+ mnemonic#"\t$V1, $I2, $M3",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
+ imm:$I2, index:$M3)))]> {
+ let Constraints = "$V1 = $V1src";
+ let DisableEncoding = "$V1src";
+}
+
+class TernaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type>
+ : InstVRId<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, imm32zx8:$I4),
+ mnemonic#"\t$V1, $V2, $V3, $I4",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ imm32zx8:$I4)))]> {
+ let M5 = type;
+}
+
+class TernaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m4or>
+ : InstVRRa<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $M4, $M5",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ imm32zx4:$M4,
+ imm32zx4:$M5)))],
+ m4or> {
+ let M3 = type;
+}
+
+class TernaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type,
+ SDPatternOperator m5mask, bits<4> m5or>
+ : InstVRRb<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, m5mask:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $M5",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ m5mask:$M5)))],
+ m5or> {
+ let M4 = type;
+}
+
+multiclass TernaryVRRbSPair<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ SDPatternOperator operator_cc, TypedReg tr1,
+ TypedReg tr2, bits<4> type, bits<4> m5or> {
+ def "" : TernaryVRRb<mnemonic, opcode, operator, tr1, tr2, type,
+ imm32zx4even, !and (m5or, 14)>;
+ def : InstAlias<mnemonic#"\t$V1, $V2, $V3",
+ (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
+ tr2.op:$V3, 0)>;
+ let Defs = [CC] in
+ def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ imm32zx4even, !add(!and (m5or, 14), 1)>;
+ def : InstAlias<mnemonic#"s\t$V1, $V2, $V3",
+ (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
+ tr2.op:$V3, 0)>;
+}
+
+class TernaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2>
+ : InstVRRc<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, imm32zx4:$M4),
+ mnemonic#"\t$V1, $V2, $V3, $M4",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ imm32zx4:$M4)))]> {
+ let M5 = 0;
+ let M6 = 0;
+}
+
+class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type = 0>
+ : InstVRRd<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
+ mnemonic#"\t$V1, $V2, $V3, $V4",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ (tr1.vt tr1.op:$V4))))]> {
+ let M5 = type;
+ let M6 = 0;
+}
+
+class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0>
+ : InstVRRe<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
+ mnemonic#"\t$V1, $V2, $V3, $V4",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ (tr1.vt tr1.op:$V4))))]> {
+ let M5 = m5;
+ let M6 = type;
+}
+
+class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, RegisterOperand cls, bits<4> type>
+ : InstVRSb<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V1src, cls:$R3, shift12only:$BD2),
+ mnemonic#"\t$V1, $R3, $BD2",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
+ cls:$R3,
+ shift12only:$BD2)))]> {
+ let Constraints = "$V1 = $V1src";
+ let DisableEncoding = "$V1src";
+ let M4 = type;
+}
+
+class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
+ Immediate index>
+ : InstVRV<opcode, (outs VR128:$V1),
+ (ins VR128:$V1src, bdvaddr12only:$VBD2, index:$M3),
+ mnemonic#"\t$V1, $VBD2, $M3", []> {
+ let Constraints = "$V1 = $V1src";
+ let DisableEncoding = "$V1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class TernaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index>
+ : InstVRX<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V1src, bdxaddr12only:$XBD2, index:$M3),
+ mnemonic#"\t$V1, $XBD2, $M3",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
+ bdxaddr12only:$XBD2,
+ index:$M3)))]> {
+ let Constraints = "$V1 = $V1src";
+ let DisableEncoding = "$V1src";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+}
+
+class QuaternaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type>
+ : InstVRId<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V1src, tr2.op:$V2, tr2.op:$V3, imm32zx8:$I4),
+ mnemonic#"\t$V1, $V2, $V3, $I4",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
+ (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ imm32zx8:$I4)))]> {
+ let Constraints = "$V1 = $V1src";
+ let DisableEncoding = "$V1src";
+ let M5 = type;
+}
+
+class QuaternaryVRRd<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, TypedReg tr1, TypedReg tr2,
+ bits<4> type, SDPatternOperator m6mask, bits<4> m6or>
+ : InstVRRd<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, m6mask:$M6),
+ mnemonic#"\t$V1, $V2, $V3, $V4, $M6",
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ (tr2.vt tr2.op:$V4),
+ m6mask:$M6)))],
+ m6or> {
+ let M5 = type;
+}
+
+multiclass QuaternaryVRRdSPair<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ SDPatternOperator operator_cc, TypedReg tr1,
+ TypedReg tr2, bits<4> type, bits<4> m6or> {
+ def "" : QuaternaryVRRd<mnemonic, opcode, operator, tr1, tr2, type,
+ imm32zx4even, !and (m6or, 14)>;
+ def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
+ (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
+ tr2.op:$V3, tr2.op:$V4, 0)>;
+ let Defs = [CC] in
+ def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
+ imm32zx4even, !add (!and (m6or, 14), 1)>;
+ def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4",
+ (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
+ tr2.op:$V3, tr2.op:$V4, 0)>;
+}
+
class LoadAndOpRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, AddressingMode mode = bdaddr20only>
: InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, mode:$BD2),
@@ -1330,10 +2151,13 @@ class PrefetchRILPC<string mnemonic, bits<12> opcode,
// A floating-point load-and test operation. Create both a normal unary
// operation and one that acts as a comparison against zero.
+// Note that the comparison against zero operation is not available if we
+// have vector support, since load-and-test instructions will partially
+// clobber the target (vector) register.
multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
RegisterOperand cls> {
def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>;
- let isCodeGenOnly = 1 in
+ let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in
def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>;
}
@@ -1577,6 +2401,26 @@ class Alias<int size, dag outs, dag ins, list<dag> pattern>
let isCodeGenOnly = 1;
}
+class UnaryAliasVRS<RegisterOperand cls1, RegisterOperand cls2>
+ : Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>;
+
+// An alias of a UnaryVRR*, but with different register sizes.
+class UnaryAliasVRR<SDPatternOperator operator, TypedReg tr1, TypedReg tr2>
+ : Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2),
+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]>;
+
+// An alias of a UnaryVRX, but with different register sizes.
+class UnaryAliasVRX<SDPatternOperator operator, TypedReg tr,
+ AddressingMode mode = bdxaddr12only>
+ : Alias<6, (outs tr.op:$V1), (ins mode:$XBD2),
+ [(set tr.op:$V1, (tr.vt (operator mode:$XBD2)))]>;
+
+// An alias of a StoreVRX, but with different register sizes.
+class StoreAliasVRX<SDPatternOperator operator, TypedReg tr,
+ AddressingMode mode = bdxaddr12only>
+ : Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2),
+ [(operator (tr.vt tr.op:$V1), mode:$XBD2)]>;
+
// An alias of a BinaryRI, but with different register sizes.
class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
Immediate imm>
@@ -1593,6 +2437,10 @@ class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls,
let Constraints = "$R1 = $R1src";
}
+// An alias of a BinaryVRRf, but with different register sizes.
+class BinaryAliasVRRf<RegisterOperand cls>
+ : Alias<6, (outs VR128:$V1), (ins cls:$R2, cls:$R3), []>;
+
// An alias of a CompareRI, but with different register sizes.
class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
Immediate imm>
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 8ff9553ca0817..90598852b5ed0 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -423,7 +423,7 @@ static MachineInstr *getDef(unsigned Reg,
}
// Return true if MI is a shift of type Opcode by Imm bits.
-static bool isShift(MachineInstr *MI, int Opcode, int64_t Imm) {
+static bool isShift(MachineInstr *MI, unsigned Opcode, int64_t Imm) {
return (MI->getOpcode() == Opcode &&
!MI->getOperand(2).getReg() &&
MI->getOperand(3).getImm() == Imm);
@@ -578,6 +578,12 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opcode = SystemZ::LDR;
else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LXR;
+ else if (SystemZ::VR32BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::VLR32;
+ else if (SystemZ::VR64BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::VLR64;
+ else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::VLR;
else
llvm_unreachable("Impossible reg-to-reg copy");
@@ -633,7 +639,7 @@ struct LogicOp {
LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize)
: RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {}
- LLVM_EXPLICIT operator bool() const { return RegSize; }
+ explicit operator bool() const { return RegSize; }
unsigned RegSize, ImmLSB, ImmSize;
};
@@ -723,9 +729,12 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Start, End;
if (isRxSBGMask(Imm, And.RegSize, Start, End)) {
unsigned NewOpcode;
- if (And.RegSize == 64)
+ if (And.RegSize == 64) {
NewOpcode = SystemZ::RISBG;
- else {
+ // Prefer RISBGN if available, since it does not clobber CC.
+ if (STI.hasMiscellaneousExtensions())
+ NewOpcode = SystemZ::RISBGN;
+ } else {
NewOpcode = SystemZ::RISBMux;
Start &= 31;
End &= 31;
@@ -743,11 +752,10 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return nullptr;
}
-MachineInstr *
-SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
unsigned Opcode = MI->getOpcode();
@@ -862,9 +870,9 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
}
MachineInstr *
-SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
+SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const {
return nullptr;
}
@@ -1114,6 +1122,16 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
} else if (RC == &SystemZ::FP128BitRegClass) {
LoadOpcode = SystemZ::LX;
StoreOpcode = SystemZ::STX;
+ } else if (RC == &SystemZ::VR32BitRegClass) {
+ LoadOpcode = SystemZ::VL32;
+ StoreOpcode = SystemZ::VST32;
+ } else if (RC == &SystemZ::VR64BitRegClass) {
+ LoadOpcode = SystemZ::VL64;
+ StoreOpcode = SystemZ::VST64;
+ } else if (RC == &SystemZ::VF128BitRegClass ||
+ RC == &SystemZ::VR128BitRegClass) {
+ LoadOpcode = SystemZ::VL;
+ StoreOpcode = SystemZ::VST;
} else
llvm_unreachable("Unsupported regclass to load or store");
}
@@ -1147,17 +1165,22 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
switch (Opcode) {
- case SystemZ::L: return SystemZ::LT;
- case SystemZ::LY: return SystemZ::LT;
- case SystemZ::LG: return SystemZ::LTG;
- case SystemZ::LGF: return SystemZ::LTGF;
- case SystemZ::LR: return SystemZ::LTR;
- case SystemZ::LGFR: return SystemZ::LTGFR;
- case SystemZ::LGR: return SystemZ::LTGR;
- case SystemZ::LER: return SystemZ::LTEBR;
- case SystemZ::LDR: return SystemZ::LTDBR;
- case SystemZ::LXR: return SystemZ::LTXBR;
- default: return 0;
+ case SystemZ::L: return SystemZ::LT;
+ case SystemZ::LY: return SystemZ::LT;
+ case SystemZ::LG: return SystemZ::LTG;
+ case SystemZ::LGF: return SystemZ::LTGF;
+ case SystemZ::LR: return SystemZ::LTR;
+ case SystemZ::LGFR: return SystemZ::LTGFR;
+ case SystemZ::LGR: return SystemZ::LTGR;
+ case SystemZ::LER: return SystemZ::LTEBR;
+ case SystemZ::LDR: return SystemZ::LTDBR;
+ case SystemZ::LXR: return SystemZ::LTXBR;
+ // On zEC12 we prefer to use RISBGN. But if there is a chance to
+ // actually use the condition code, we may turn it back into RISGB.
+ // Note that RISBG is not really a "load-and-test" instruction,
+ // but sets the same condition code values, so is OK to use here.
+ case SystemZ::RISBGN: return SystemZ::RISBG;
+ default: return 0;
}
}
@@ -1178,6 +1201,7 @@ static bool isStringOfOnes(uint64_t Mask, unsigned &LSB, unsigned &Length) {
bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize,
unsigned &Start, unsigned &End) const {
// Reject trivial all-zero masks.
+ Mask &= allOnes(BitSize);
if (Mask == 0)
return false;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index d2e3f541f80ee..b55810b253f15 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -56,10 +56,13 @@ static inline unsigned getCompareZeroCCMask(unsigned int Flags) {
// SystemZ MachineOperand target flags.
enum {
// Masks out the bits for the access model.
- MO_SYMBOL_MODIFIER = (1 << 0),
+ MO_SYMBOL_MODIFIER = (3 << 0),
// @GOT (aka @GOTENT)
- MO_GOT = (1 << 0)
+ MO_GOT = (1 << 0),
+
+ // @INDNTPOFF
+ MO_INDNTPOFF = (2 << 0)
};
// Classifies a branch.
enum BranchType {
@@ -183,11 +186,11 @@ public:
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const override;
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ ArrayRef<unsigned> Ops,
int FrameIndex) const override;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const override;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const override;
bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
override;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 902d74de506bf..820f30bc173d2 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -249,11 +249,21 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>;
}
+// TLS calls. These will be lowered into a call to __tls_get_offset,
+// with an extra relocation specifying the TLS symbol.
+let isCall = 1, Defs = [R14D, CC] in {
+ def TLS_GDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops),
+ [(z_tls_gdcall tglobaltlsaddr:$I2)]>;
+ def TLS_LDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops),
+ [(z_tls_ldcall tglobaltlsaddr:$I2)]>;
+}
+
// Define the general form of the call instructions for the asm parser.
// These instructions don't hard-code %r14 as the return address register.
-def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2),
+// Allow an optional TLS marker symbol to generate TLS call relocations.
+def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16tls:$I2),
"bras\t$R1, $I2", []>;
-def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2),
+def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32tls:$I2),
"brasl\t$R1, $I2", []>;
def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
"basr\t$R1, $R2", []>;
@@ -587,6 +597,12 @@ let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
[(set GR64:$R1, pcrel32:$I2)]>;
}
+// Load the Global Offset Table address. This will be lowered into a
+// larl $R1, _GLOBAL_OFFSET_TABLE_
+// instruction.
+def GOT : Alias<6, (outs GR64:$R1), (ins),
+ [(set GR64:$R1, (global_offset_table))]>;
+
//===----------------------------------------------------------------------===//
// Absolute and Negation
//===----------------------------------------------------------------------===//
@@ -1045,6 +1061,10 @@ let Defs = [CC] in {
def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
}
+// On zEC12 we have a variant of RISBG that does not set CC.
+let Predicates = [FeatureMiscellaneousExtensions] in
+ def RISBGN : RotateSelectRIEf<"risbgn", 0xEC59, GR64, GR64>;
+
// Forms of RISBG that only affect one word of the destination register.
// They do not set CC.
let Predicates = [FeatureHighWord] in {
@@ -1342,6 +1362,60 @@ let Defs = [CC] in {
}
//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureTransactionalExecution] in {
+ // Transaction Begin
+ let hasSideEffects = 1, mayStore = 1,
+ usesCustomInserter = 1, Defs = [CC] in {
+ def TBEGIN : InstSIL<0xE560,
+ (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+ "tbegin\t$BD1, $I2",
+ [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>;
+ def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+ [(z_tbegin_nofloat bdaddr12only:$BD1,
+ imm32zx16:$I2)]>;
+ def TBEGINC : InstSIL<0xE561,
+ (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+ "tbeginc\t$BD1, $I2",
+ [(int_s390_tbeginc bdaddr12only:$BD1,
+ imm32zx16:$I2)]>;
+ }
+
+ // Transaction End
+ let hasSideEffects = 1, Defs = [CC], BD2 = 0 in
+ def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>;
+
+ // Transaction Abort
+ let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in
+ def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2),
+ "tabort\t$BD2",
+ [(int_s390_tabort bdaddr12only:$BD2)]>;
+
+ // Nontransactional Store
+ let hasSideEffects = 1 in
+ def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>;
+
+ // Extract Transaction Nesting Depth
+ let hasSideEffects = 1 in
+ def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureProcessorAssist] in {
+ let hasSideEffects = 1, R4 = 0 in
+ def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3),
+ "ppa\t$R1, $R2, $R3", []>;
+ def : Pat<(int_s390_ppa_txassist GR32:$src),
+ (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
+ 0, 1)>;
+}
+
+//===----------------------------------------------------------------------===//
// Miscellaneous Instructions.
//===----------------------------------------------------------------------===//
@@ -1366,6 +1440,13 @@ let Defs = [CC] in {
def : Pat<(ctlz GR64:$src),
(EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
+// Population count. Counts bits set per byte.
+let Predicates = [FeaturePopulationCount], Defs = [CC] in {
+ def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2),
+ "popcnt\t$R1, $R2",
+ [(set GR64:$R1, (z_popcnt GR64:$R2))]>;
+}
+
// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
def : Pat<(i64 (anyext GR32:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td
new file mode 100644
index 0000000000000..c101e43ada3a4
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrVector.td
@@ -0,0 +1,1097 @@
+//==- SystemZInstrVector.td - SystemZ Vector instructions ------*- tblgen-*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Register move.
+ def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>;
+ def VLR32 : UnaryAliasVRR<null_frag, v32eb, v32eb>;
+ def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>;
+
+ // Load GR from VR element.
+ def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>;
+ def VLGVH : BinaryVRSc<"vlgvh", 0xE721, null_frag, v128h, 1>;
+ def VLGVF : BinaryVRSc<"vlgvf", 0xE721, null_frag, v128f, 2>;
+ def VLGVG : BinaryVRSc<"vlgvg", 0xE721, z_vector_extract, v128g, 3>;
+
+ // Load VR element from GR.
+ def VLVGB : TernaryVRSb<"vlvgb", 0xE722, z_vector_insert,
+ v128b, v128b, GR32, 0>;
+ def VLVGH : TernaryVRSb<"vlvgh", 0xE722, z_vector_insert,
+ v128h, v128h, GR32, 1>;
+ def VLVGF : TernaryVRSb<"vlvgf", 0xE722, z_vector_insert,
+ v128f, v128f, GR32, 2>;
+ def VLVGG : TernaryVRSb<"vlvgg", 0xE722, z_vector_insert,
+ v128g, v128g, GR64, 3>;
+
+ // Load VR from GRs disjoint.
+ def VLVGP : BinaryVRRf<"vlvgp", 0xE762, z_join_dwords, v128g>;
+ def VLVGP32 : BinaryAliasVRRf<GR32>;
+}
+
+// Extractions always assign to the full GR64, even if the element would
+// fit in the lower 32 bits. Sub-i64 extracts therefore need to take a
+// subreg of the result.
+class VectorExtractSubreg<ValueType type, Instruction insn>
+ : Pat<(i32 (z_vector_extract (type VR128:$vec), shift12only:$index)),
+ (EXTRACT_SUBREG (insn VR128:$vec, shift12only:$index), subreg_l32)>;
+
+def : VectorExtractSubreg<v16i8, VLGVB>;
+def : VectorExtractSubreg<v8i16, VLGVH>;
+def : VectorExtractSubreg<v4i32, VLGVF>;
+
+//===----------------------------------------------------------------------===//
+// Immediate instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Generate byte mask.
+ def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
+ def VONE : InherentVRIa<"vone", 0xE744, 0xffff>;
+ def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
+
+ // Generate mask.
+ def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>;
+ def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>;
+ def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>;
+ def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>;
+
+ // Load element immediate.
+ //
+ // We want these instructions to be used ahead of VLVG* where possible.
+ // However, VLVG* takes a variable BD-format index whereas VLEI takes
+ // a plain immediate index. This means that VLVG* has an extra "base"
+ // register operand and is 3 units more complex. Bumping the complexity
+ // of the VLEI* instructions by 4 means that they are strictly better
+ // than VLVG* in cases where both forms match.
+ let AddedComplexity = 4 in {
+ def VLEIB : TernaryVRIa<"vleib", 0xE740, z_vector_insert,
+ v128b, v128b, imm32sx16trunc, imm32zx4>;
+ def VLEIH : TernaryVRIa<"vleih", 0xE741, z_vector_insert,
+ v128h, v128h, imm32sx16trunc, imm32zx3>;
+ def VLEIF : TernaryVRIa<"vleif", 0xE743, z_vector_insert,
+ v128f, v128f, imm32sx16, imm32zx2>;
+ def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert,
+ v128g, v128g, imm64sx16, imm32zx1>;
+ }
+
+ // Replicate immediate.
+ def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
+ def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
+ def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
+ def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
+}
+
+//===----------------------------------------------------------------------===//
+// Loads
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Load.
+ def VL : UnaryVRX<"vl", 0xE706, null_frag, v128any, 16>;
+
+ // Load to block boundary. The number of loaded bytes is only known
+ // at run time. The instruction is really polymorphic, but v128b matches
+ // the return type of the associated intrinsic.
+ def VLBB : BinaryVRX<"vlbb", 0xE707, int_s390_vlbb, v128b, 0>;
+
+ // Load count to block boundary.
+ let Defs = [CC] in
+ def LCBB : InstRXE<0xE727, (outs GR32:$R1),
+ (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
+ "lcbb\t$R1, $XBD2, $M3",
+ [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2,
+ imm32zx4:$M3))]>;
+
+ // Load with length. The number of loaded bytes is only known at run time.
+ def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>;
+
+ // Load multiple.
+ def VLM : LoadMultipleVRSa<"vlm", 0xE736>;
+
+ // Load and replicate
+ def VLREPB : UnaryVRX<"vlrepb", 0xE705, z_replicate_loadi8, v128b, 1, 0>;
+ def VLREPH : UnaryVRX<"vlreph", 0xE705, z_replicate_loadi16, v128h, 2, 1>;
+ def VLREPF : UnaryVRX<"vlrepf", 0xE705, z_replicate_loadi32, v128f, 4, 2>;
+ def VLREPG : UnaryVRX<"vlrepg", 0xE705, z_replicate_loadi64, v128g, 8, 3>;
+ def : Pat<(v4f32 (z_replicate_loadf32 bdxaddr12only:$addr)),
+ (VLREPF bdxaddr12only:$addr)>;
+ def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)),
+ (VLREPG bdxaddr12only:$addr)>;
+
+ // Use VLREP to load subvectors. These patterns use "12pair" because
+ // LEY and LDY offer full 20-bit displacement fields. It's often better
+ // to use those instructions rather than force a 20-bit displacement
+ // into a GPR temporary.
+ def VL32 : UnaryAliasVRX<load, v32eb, bdxaddr12pair>;
+ def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
+
+ // Load logical element and zero.
+ def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>;
+ def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
+ def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>;
+ def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>;
+ def : Pat<(v4f32 (z_vllezf32 bdxaddr12only:$addr)),
+ (VLLEZF bdxaddr12only:$addr)>;
+ def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)),
+ (VLLEZG bdxaddr12only:$addr)>;
+
+ // Load element.
+ def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8, v128b, v128b, 1, imm32zx4>;
+ def VLEH : TernaryVRX<"vleh", 0xE701, z_vlei16, v128h, v128h, 2, imm32zx3>;
+ def VLEF : TernaryVRX<"vlef", 0xE703, z_vlei32, v128f, v128f, 4, imm32zx2>;
+ def VLEG : TernaryVRX<"vleg", 0xE702, z_vlei64, v128g, v128g, 8, imm32zx1>;
+ def : Pat<(z_vlef32 (v4f32 VR128:$val), bdxaddr12only:$addr, imm32zx2:$index),
+ (VLEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>;
+ def : Pat<(z_vlef64 (v2f64 VR128:$val), bdxaddr12only:$addr, imm32zx1:$index),
+ (VLEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
+
+ // Gather element.
+ def VGEF : TernaryVRV<"vgef", 0xE713, 4, imm32zx2>;
+ def VGEG : TernaryVRV<"vgeg", 0xE712, 8, imm32zx1>;
+}
+
+// Use replicating loads if we're inserting a single element into an
+// undefined vector. This avoids a false dependency on the previous
+// register contents.
+multiclass ReplicatePeephole<Instruction vlrep, ValueType vectype,
+ SDPatternOperator load, ValueType scalartype> {
+ def : Pat<(vectype (z_vector_insert
+ (undef), (scalartype (load bdxaddr12only:$addr)), 0)),
+ (vlrep bdxaddr12only:$addr)>;
+ def : Pat<(vectype (scalar_to_vector
+ (scalartype (load bdxaddr12only:$addr)))),
+ (vlrep bdxaddr12only:$addr)>;
+}
+defm : ReplicatePeephole<VLREPB, v16i8, anyextloadi8, i32>;
+defm : ReplicatePeephole<VLREPH, v8i16, anyextloadi16, i32>;
+defm : ReplicatePeephole<VLREPF, v4i32, load, i32>;
+defm : ReplicatePeephole<VLREPG, v2i64, load, i64>;
+defm : ReplicatePeephole<VLREPF, v4f32, load, f32>;
+defm : ReplicatePeephole<VLREPG, v2f64, load, f64>;
+
+//===----------------------------------------------------------------------===//
+// Stores
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Store.
+ def VST : StoreVRX<"vst", 0xE70E, null_frag, v128any, 16>;
+
+ // Store with length. The number of stored bytes is only known at run time.
+ def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>;
+
+ // Store multiple.
+ def VSTM : StoreMultipleVRSa<"vstm", 0xE73E>;
+
+ // Store element.
+ def VSTEB : StoreBinaryVRX<"vsteb", 0xE708, z_vstei8, v128b, 1, imm32zx4>;
+ def VSTEH : StoreBinaryVRX<"vsteh", 0xE709, z_vstei16, v128h, 2, imm32zx3>;
+ def VSTEF : StoreBinaryVRX<"vstef", 0xE70B, z_vstei32, v128f, 4, imm32zx2>;
+ def VSTEG : StoreBinaryVRX<"vsteg", 0xE70A, z_vstei64, v128g, 8, imm32zx1>;
+ def : Pat<(z_vstef32 (v4f32 VR128:$val), bdxaddr12only:$addr,
+ imm32zx2:$index),
+ (VSTEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>;
+ def : Pat<(z_vstef64 (v2f64 VR128:$val), bdxaddr12only:$addr,
+ imm32zx1:$index),
+ (VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
+
+ // Use VSTE to store subvectors. These patterns use "12pair" because
+ // STEY and STDY offer full 20-bit displacement fields. It's often better
+ // to use those instructions rather than force a 20-bit displacement
+ // into a GPR temporary.
+ def VST32 : StoreAliasVRX<store, v32eb, bdxaddr12pair>;
+ def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>;
+
+ // Scatter element.
+ def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>;
+ def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>;
+}
+
+//===----------------------------------------------------------------------===//
+// Selects and permutes
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Merge high.
+ def VMRHB : BinaryVRRc<"vmrhb", 0xE761, z_merge_high, v128b, v128b, 0>;
+ def VMRHH : BinaryVRRc<"vmrhh", 0xE761, z_merge_high, v128h, v128h, 1>;
+ def VMRHF : BinaryVRRc<"vmrhf", 0xE761, z_merge_high, v128f, v128f, 2>;
+ def VMRHG : BinaryVRRc<"vmrhg", 0xE761, z_merge_high, v128g, v128g, 3>;
+ def : BinaryRRWithType<VMRHF, VR128, z_merge_high, v4f32>;
+ def : BinaryRRWithType<VMRHG, VR128, z_merge_high, v2f64>;
+
+ // Merge low.
+ def VMRLB : BinaryVRRc<"vmrlb", 0xE760, z_merge_low, v128b, v128b, 0>;
+ def VMRLH : BinaryVRRc<"vmrlh", 0xE760, z_merge_low, v128h, v128h, 1>;
+ def VMRLF : BinaryVRRc<"vmrlf", 0xE760, z_merge_low, v128f, v128f, 2>;
+ def VMRLG : BinaryVRRc<"vmrlg", 0xE760, z_merge_low, v128g, v128g, 3>;
+ def : BinaryRRWithType<VMRLF, VR128, z_merge_low, v4f32>;
+ def : BinaryRRWithType<VMRLG, VR128, z_merge_low, v2f64>;
+
+ // Permute.
+ def VPERM : TernaryVRRe<"vperm", 0xE78C, z_permute, v128b, v128b>;
+
+ // Permute doubleword immediate.
+ def VPDI : TernaryVRRc<"vpdi", 0xE784, z_permute_dwords, v128g, v128g>;
+
+ // Replicate.
+ def VREPB : BinaryVRIc<"vrepb", 0xE74D, z_splat, v128b, v128b, 0>;
+ def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>;
+ def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>;
+ def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>;
+ def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)),
+ (VREPF VR128:$vec, imm32zx16:$index)>;
+ def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)),
+ (VREPG VR128:$vec, imm32zx16:$index)>;
+
+ // Select.
+ def VSEL : TernaryVRRe<"vsel", 0xE78D, null_frag, v128any, v128any>;
+}
+
+//===----------------------------------------------------------------------===//
+// Widening and narrowing
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Pack
+ def VPKH : BinaryVRRc<"vpkh", 0xE794, z_pack, v128b, v128h, 1>;
+ def VPKF : BinaryVRRc<"vpkf", 0xE794, z_pack, v128h, v128f, 2>;
+ def VPKG : BinaryVRRc<"vpkg", 0xE794, z_pack, v128f, v128g, 3>;
+
+ // Pack saturate.
+ defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, int_s390_vpksh, z_packs_cc,
+ v128b, v128h, 1>;
+ defm VPKSF : BinaryVRRbSPair<"vpksf", 0xE797, int_s390_vpksf, z_packs_cc,
+ v128h, v128f, 2>;
+ defm VPKSG : BinaryVRRbSPair<"vpksg", 0xE797, int_s390_vpksg, z_packs_cc,
+ v128f, v128g, 3>;
+
+ // Pack saturate logical.
+ defm VPKLSH : BinaryVRRbSPair<"vpklsh", 0xE795, int_s390_vpklsh, z_packls_cc,
+ v128b, v128h, 1>;
+ defm VPKLSF : BinaryVRRbSPair<"vpklsf", 0xE795, int_s390_vpklsf, z_packls_cc,
+ v128h, v128f, 2>;
+ defm VPKLSG : BinaryVRRbSPair<"vpklsg", 0xE795, int_s390_vpklsg, z_packls_cc,
+ v128f, v128g, 3>;
+
+ // Sign-extend to doubleword.
+ def VSEGB : UnaryVRRa<"vsegb", 0xE75F, z_vsei8, v128g, v128g, 0>;
+ def VSEGH : UnaryVRRa<"vsegh", 0xE75F, z_vsei16, v128g, v128g, 1>;
+ def VSEGF : UnaryVRRa<"vsegf", 0xE75F, z_vsei32, v128g, v128g, 2>;
+ def : Pat<(z_vsei8_by_parts (v16i8 VR128:$src)), (VSEGB VR128:$src)>;
+ def : Pat<(z_vsei16_by_parts (v8i16 VR128:$src)), (VSEGH VR128:$src)>;
+ def : Pat<(z_vsei32_by_parts (v4i32 VR128:$src)), (VSEGF VR128:$src)>;
+
+ // Unpack high.
+ def VUPHB : UnaryVRRa<"vuphb", 0xE7D7, z_unpack_high, v128h, v128b, 0>;
+ def VUPHH : UnaryVRRa<"vuphh", 0xE7D7, z_unpack_high, v128f, v128h, 1>;
+ def VUPHF : UnaryVRRa<"vuphf", 0xE7D7, z_unpack_high, v128g, v128f, 2>;
+
+ // Unpack logical high.
+ def VUPLHB : UnaryVRRa<"vuplhb", 0xE7D5, z_unpackl_high, v128h, v128b, 0>;
+ def VUPLHH : UnaryVRRa<"vuplhh", 0xE7D5, z_unpackl_high, v128f, v128h, 1>;
+ def VUPLHF : UnaryVRRa<"vuplhf", 0xE7D5, z_unpackl_high, v128g, v128f, 2>;
+
+ // Unpack low.
+ def VUPLB : UnaryVRRa<"vuplb", 0xE7D6, z_unpack_low, v128h, v128b, 0>;
+ def VUPLHW : UnaryVRRa<"vuplhw", 0xE7D6, z_unpack_low, v128f, v128h, 1>;
+ def VUPLF : UnaryVRRa<"vuplf", 0xE7D6, z_unpack_low, v128g, v128f, 2>;
+
+ // Unpack logical low.
+ def VUPLLB : UnaryVRRa<"vupllb", 0xE7D4, z_unpackl_low, v128h, v128b, 0>;
+ def VUPLLH : UnaryVRRa<"vupllh", 0xE7D4, z_unpackl_low, v128f, v128h, 1>;
+ def VUPLLF : UnaryVRRa<"vupllf", 0xE7D4, z_unpackl_low, v128g, v128f, 2>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instantiating generic operations for specific types.
+//===----------------------------------------------------------------------===//
+
+multiclass GenericVectorOps<ValueType type, ValueType inttype> {
+ let Predicates = [FeatureVector] in {
+ def : Pat<(type (load bdxaddr12only:$addr)),
+ (VL bdxaddr12only:$addr)>;
+ def : Pat<(store (type VR128:$src), bdxaddr12only:$addr),
+ (VST VR128:$src, bdxaddr12only:$addr)>;
+ def : Pat<(type (vselect (inttype VR128:$x), VR128:$y, VR128:$z)),
+ (VSEL VR128:$y, VR128:$z, VR128:$x)>;
+ def : Pat<(type (vselect (inttype (z_vnot VR128:$x)), VR128:$y, VR128:$z)),
+ (VSEL VR128:$z, VR128:$y, VR128:$x)>;
+ }
+}
+
+defm : GenericVectorOps<v16i8, v16i8>;
+defm : GenericVectorOps<v8i16, v8i16>;
+defm : GenericVectorOps<v4i32, v4i32>;
+defm : GenericVectorOps<v2i64, v2i64>;
+defm : GenericVectorOps<v4f32, v4i32>;
+defm : GenericVectorOps<v2f64, v2i64>;
+
+//===----------------------------------------------------------------------===//
+// Integer arithmetic
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Add.
+ def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>;
+ def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>;
+ def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>;
+ def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>;
+ def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>;
+
+ // Add compute carry.
+ def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>;
+ def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>;
+ def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>;
+ def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>;
+ def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>;
+
+ // Add with carry.
+ def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>;
+
+ // Add with carry compute carry.
+ def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>;
+
+ // And.
+ def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>;
+
+ // And with complement.
+ def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>;
+
+ // Average.
+ def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>;
+ def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>;
+ def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>;
+ def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>;
+
+ // Average logical.
+ def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>;
+ def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>;
+ def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>;
+ def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>;
+
+ // Checksum.
+ def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>;
+
+ // Count leading zeros.
+ def VCLZB : UnaryVRRa<"vclzb", 0xE753, ctlz, v128b, v128b, 0>;
+ def VCLZH : UnaryVRRa<"vclzh", 0xE753, ctlz, v128h, v128h, 1>;
+ def VCLZF : UnaryVRRa<"vclzf", 0xE753, ctlz, v128f, v128f, 2>;
+ def VCLZG : UnaryVRRa<"vclzg", 0xE753, ctlz, v128g, v128g, 3>;
+
+ // Count trailing zeros.
+ def VCTZB : UnaryVRRa<"vctzb", 0xE752, cttz, v128b, v128b, 0>;
+ def VCTZH : UnaryVRRa<"vctzh", 0xE752, cttz, v128h, v128h, 1>;
+ def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>;
+ def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>;
+
+ // Exclusive or.
+ def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>;
+
+ // Galois field multiply sum.
+ def VGFMB : BinaryVRRc<"vgfmb", 0xE7B4, int_s390_vgfmb, v128h, v128b, 0>;
+ def VGFMH : BinaryVRRc<"vgfmh", 0xE7B4, int_s390_vgfmh, v128f, v128h, 1>;
+ def VGFMF : BinaryVRRc<"vgfmf", 0xE7B4, int_s390_vgfmf, v128g, v128f, 2>;
+ def VGFMG : BinaryVRRc<"vgfmg", 0xE7B4, int_s390_vgfmg, v128q, v128g, 3>;
+
+ // Galois field multiply sum and accumulate.
+ def VGFMAB : TernaryVRRd<"vgfmab", 0xE7BC, int_s390_vgfmab, v128h, v128b, 0>;
+ def VGFMAH : TernaryVRRd<"vgfmah", 0xE7BC, int_s390_vgfmah, v128f, v128h, 1>;
+ def VGFMAF : TernaryVRRd<"vgfmaf", 0xE7BC, int_s390_vgfmaf, v128g, v128f, 2>;
+ def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, int_s390_vgfmag, v128q, v128g, 3>;
+
+ // Load complement.
+ def VLCB : UnaryVRRa<"vlcb", 0xE7DE, z_vneg, v128b, v128b, 0>;
+ def VLCH : UnaryVRRa<"vlch", 0xE7DE, z_vneg, v128h, v128h, 1>;
+ def VLCF : UnaryVRRa<"vlcf", 0xE7DE, z_vneg, v128f, v128f, 2>;
+ def VLCG : UnaryVRRa<"vlcg", 0xE7DE, z_vneg, v128g, v128g, 3>;
+
+ // Load positive.
+ def VLPB : UnaryVRRa<"vlpb", 0xE7DF, z_viabs8, v128b, v128b, 0>;
+ def VLPH : UnaryVRRa<"vlph", 0xE7DF, z_viabs16, v128h, v128h, 1>;
+ def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>;
+ def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>;
+
+ // Maximum.
+ def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>;
+ def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>;
+ def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>;
+ def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>;
+
+ // Maximum logical.
+ def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>;
+ def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>;
+ def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>;
+ def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>;
+
+ // Minimum.
+ def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>;
+ def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>;
+ def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>;
+ def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>;
+
+ // Minimum logical.
+ def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>;
+ def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>;
+ def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>;
+ def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>;
+
+ // Multiply and add low.
+ def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>;
+ def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>;
+ def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>;
+
+ // Multiply and add high.
+ def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>;
+ def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>;
+ def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>;
+
+ // Multiply and add logical high.
+ def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>;
+ def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>;
+ def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>;
+
+ // Multiply and add even.
+ def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>;
+ def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>;
+ def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>;
+
+ // Multiply and add logical even.
+ def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>;
+ def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>;
+ def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>;
+
+ // Multiply and add odd.
+ def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>;
+ def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>;
+ def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>;
+
+ // Multiply and add logical odd.
+ def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>;
+ def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>;
+ def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>;
+
+ // Multiply high.
+ def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>;
+ def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>;
+ def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>;
+
+ // Multiply logical high.
+ def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>;
+ def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>;
+ def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>;
+
+ // Multiply low.
+ def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>;
+ def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>;
+ def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>;
+
+ // Multiply even.
+ def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>;
+ def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>;
+ def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>;
+
+ // Multiply logical even.
+ def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>;
+ def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>;
+ def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>;
+
+ // Multiply odd.
+ def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>;
+ def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>;
+ def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>;
+
+ // Multiply logical odd.
+ def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>;
+ def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>;
+ def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>;
+
+ // Nor.
+ def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>;
+
+ // Or.
+ def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>;
+
+ // Population count.
+ def VPOPCT : BinaryVRRa<"vpopct", 0xE750>;
+ def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>;
+
+ // Element rotate left logical (with vector shift amount).
+ def VERLLVB : BinaryVRRc<"verllvb", 0xE773, int_s390_verllvb,
+ v128b, v128b, 0>;
+ def VERLLVH : BinaryVRRc<"verllvh", 0xE773, int_s390_verllvh,
+ v128h, v128h, 1>;
+ def VERLLVF : BinaryVRRc<"verllvf", 0xE773, int_s390_verllvf,
+ v128f, v128f, 2>;
+ def VERLLVG : BinaryVRRc<"verllvg", 0xE773, int_s390_verllvg,
+ v128g, v128g, 3>;
+
+ // Element rotate left logical (with scalar shift amount).
+ def VERLLB : BinaryVRSa<"verllb", 0xE733, int_s390_verllb, v128b, v128b, 0>;
+ def VERLLH : BinaryVRSa<"verllh", 0xE733, int_s390_verllh, v128h, v128h, 1>;
+ def VERLLF : BinaryVRSa<"verllf", 0xE733, int_s390_verllf, v128f, v128f, 2>;
+ def VERLLG : BinaryVRSa<"verllg", 0xE733, int_s390_verllg, v128g, v128g, 3>;
+
+ // Element rotate and insert under mask.
+ def VERIMB : QuaternaryVRId<"verimb", 0xE772, int_s390_verimb, v128b, v128b, 0>;
+ def VERIMH : QuaternaryVRId<"verimh", 0xE772, int_s390_verimh, v128h, v128h, 1>;
+ def VERIMF : QuaternaryVRId<"verimf", 0xE772, int_s390_verimf, v128f, v128f, 2>;
+ def VERIMG : QuaternaryVRId<"verimg", 0xE772, int_s390_verimg, v128g, v128g, 3>;
+
+ // Element shift left (with vector shift amount).
+ def VESLVB : BinaryVRRc<"veslvb", 0xE770, z_vshl, v128b, v128b, 0>;
+ def VESLVH : BinaryVRRc<"veslvh", 0xE770, z_vshl, v128h, v128h, 1>;
+ def VESLVF : BinaryVRRc<"veslvf", 0xE770, z_vshl, v128f, v128f, 2>;
+ def VESLVG : BinaryVRRc<"veslvg", 0xE770, z_vshl, v128g, v128g, 3>;
+
+ // Element shift left (with scalar shift amount).
+ def VESLB : BinaryVRSa<"veslb", 0xE730, z_vshl_by_scalar, v128b, v128b, 0>;
+ def VESLH : BinaryVRSa<"veslh", 0xE730, z_vshl_by_scalar, v128h, v128h, 1>;
+ def VESLF : BinaryVRSa<"veslf", 0xE730, z_vshl_by_scalar, v128f, v128f, 2>;
+ def VESLG : BinaryVRSa<"veslg", 0xE730, z_vshl_by_scalar, v128g, v128g, 3>;
+
+ // Element shift right arithmetic (with vector shift amount).
+ def VESRAVB : BinaryVRRc<"vesravb", 0xE77A, z_vsra, v128b, v128b, 0>;
+ def VESRAVH : BinaryVRRc<"vesravh", 0xE77A, z_vsra, v128h, v128h, 1>;
+ def VESRAVF : BinaryVRRc<"vesravf", 0xE77A, z_vsra, v128f, v128f, 2>;
+ def VESRAVG : BinaryVRRc<"vesravg", 0xE77A, z_vsra, v128g, v128g, 3>;
+
+ // Element shift right arithmetic (with scalar shift amount).
+ def VESRAB : BinaryVRSa<"vesrab", 0xE73A, z_vsra_by_scalar, v128b, v128b, 0>;
+ def VESRAH : BinaryVRSa<"vesrah", 0xE73A, z_vsra_by_scalar, v128h, v128h, 1>;
+ def VESRAF : BinaryVRSa<"vesraf", 0xE73A, z_vsra_by_scalar, v128f, v128f, 2>;
+ def VESRAG : BinaryVRSa<"vesrag", 0xE73A, z_vsra_by_scalar, v128g, v128g, 3>;
+
+ // Element shift right logical (with vector shift amount).
+ def VESRLVB : BinaryVRRc<"vesrlvb", 0xE778, z_vsrl, v128b, v128b, 0>;
+ def VESRLVH : BinaryVRRc<"vesrlvh", 0xE778, z_vsrl, v128h, v128h, 1>;
+ def VESRLVF : BinaryVRRc<"vesrlvf", 0xE778, z_vsrl, v128f, v128f, 2>;
+ def VESRLVG : BinaryVRRc<"vesrlvg", 0xE778, z_vsrl, v128g, v128g, 3>;
+
+ // Element shift right logical (with scalar shift amount).
+ def VESRLB : BinaryVRSa<"vesrlb", 0xE738, z_vsrl_by_scalar, v128b, v128b, 0>;
+ def VESRLH : BinaryVRSa<"vesrlh", 0xE738, z_vsrl_by_scalar, v128h, v128h, 1>;
+ def VESRLF : BinaryVRSa<"vesrlf", 0xE738, z_vsrl_by_scalar, v128f, v128f, 2>;
+ def VESRLG : BinaryVRSa<"vesrlg", 0xE738, z_vsrl_by_scalar, v128g, v128g, 3>;
+
+ // Shift left.
+ def VSL : BinaryVRRc<"vsl", 0xE774, int_s390_vsl, v128b, v128b>;
+
+ // Shift left by byte.
+ def VSLB : BinaryVRRc<"vslb", 0xE775, int_s390_vslb, v128b, v128b>;
+
+ // Shift left double by byte.
+ def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>;
+ def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z),
+ (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>;
+
+ // Shift right arithmetic.
+ def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>;
+
+ // Shift right arithmetic by byte.
+ def VSRAB : BinaryVRRc<"vsrab", 0xE77F, int_s390_vsrab, v128b, v128b>;
+
+ // Shift right logical.
+ def VSRL : BinaryVRRc<"vsrl", 0xE77C, int_s390_vsrl, v128b, v128b>;
+
+ // Shift right logical by byte.
+ def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>;
+
+ // Subtract.
+ def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>;
+ def VSH : BinaryVRRc<"vsh", 0xE7F7, sub, v128h, v128h, 1>;
+ def VSF : BinaryVRRc<"vsf", 0xE7F7, sub, v128f, v128f, 2>;
+ def VSG : BinaryVRRc<"vsg", 0xE7F7, sub, v128g, v128g, 3>;
+ def VSQ : BinaryVRRc<"vsq", 0xE7F7, int_s390_vsq, v128q, v128q, 4>;
+
+ // Subtract compute borrow indication.
+ def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, int_s390_vscbib, v128b, v128b, 0>;
+ def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, int_s390_vscbih, v128h, v128h, 1>;
+ def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, int_s390_vscbif, v128f, v128f, 2>;
+ def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, int_s390_vscbig, v128g, v128g, 3>;
+ def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, int_s390_vscbiq, v128q, v128q, 4>;
+
+ // Subtract with borrow indication.
+ def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, int_s390_vsbiq, v128q, v128q, 4>;
+
+ // Subtract with borrow compute borrow indication.
+ def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, int_s390_vsbcbiq,
+ v128q, v128q, 4>;
+
+ // Sum across doubleword.
+ def VSUMGH : BinaryVRRc<"vsumgh", 0xE765, z_vsum, v128g, v128h, 1>;
+ def VSUMGF : BinaryVRRc<"vsumgf", 0xE765, z_vsum, v128g, v128f, 2>;
+
+ // Sum across quadword.
+ def VSUMQF : BinaryVRRc<"vsumqf", 0xE767, z_vsum, v128q, v128f, 2>;
+ def VSUMQG : BinaryVRRc<"vsumqg", 0xE767, z_vsum, v128q, v128g, 3>;
+
+ // Sum across word.
+ def VSUMB : BinaryVRRc<"vsumb", 0xE764, z_vsum, v128f, v128b, 0>;
+ def VSUMH : BinaryVRRc<"vsumh", 0xE764, z_vsum, v128f, v128h, 1>;
+}
+
+// Instantiate the bitwise ops for type TYPE.
+multiclass BitwiseVectorOps<ValueType type> {
+ let Predicates = [FeatureVector] in {
+ def : Pat<(type (and VR128:$x, VR128:$y)), (VN VR128:$x, VR128:$y)>;
+ def : Pat<(type (and VR128:$x, (z_vnot VR128:$y))),
+ (VNC VR128:$x, VR128:$y)>;
+ def : Pat<(type (or VR128:$x, VR128:$y)), (VO VR128:$x, VR128:$y)>;
+ def : Pat<(type (xor VR128:$x, VR128:$y)), (VX VR128:$x, VR128:$y)>;
+ def : Pat<(type (or (and VR128:$x, VR128:$z),
+ (and VR128:$y, (z_vnot VR128:$z)))),
+ (VSEL VR128:$x, VR128:$y, VR128:$z)>;
+ def : Pat<(type (z_vnot (or VR128:$x, VR128:$y))),
+ (VNO VR128:$x, VR128:$y)>;
+ def : Pat<(type (z_vnot VR128:$x)), (VNO VR128:$x, VR128:$x)>;
+ }
+}
+
+defm : BitwiseVectorOps<v16i8>;
+defm : BitwiseVectorOps<v8i16>;
+defm : BitwiseVectorOps<v4i32>;
+defm : BitwiseVectorOps<v2i64>;
+
+// Instantiate additional patterns for absolute-related expressions on
+// type TYPE. LC is the negate instruction for TYPE and LP is the absolute
+// instruction.
+multiclass IntegerAbsoluteVectorOps<ValueType type, Instruction lc,
+ Instruction lp, int shift> {
+ let Predicates = [FeatureVector] in {
+ def : Pat<(type (vselect (type (z_vicmph_zero VR128:$x)),
+ (z_vneg VR128:$x), VR128:$x)),
+ (lc (lp VR128:$x))>;
+ def : Pat<(type (vselect (type (z_vnot (z_vicmph_zero VR128:$x))),
+ VR128:$x, (z_vneg VR128:$x))),
+ (lc (lp VR128:$x))>;
+ def : Pat<(type (vselect (type (z_vicmpl_zero VR128:$x)),
+ VR128:$x, (z_vneg VR128:$x))),
+ (lc (lp VR128:$x))>;
+ def : Pat<(type (vselect (type (z_vnot (z_vicmpl_zero VR128:$x))),
+ (z_vneg VR128:$x), VR128:$x)),
+ (lc (lp VR128:$x))>;
+ def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)),
+ (z_vneg VR128:$x)),
+ (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))),
+ VR128:$x))),
+ (lp VR128:$x)>;
+ def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)),
+ VR128:$x),
+ (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))),
+ (z_vneg VR128:$x)))),
+ (lc (lp VR128:$x))>;
+ }
+}
+
+defm : IntegerAbsoluteVectorOps<v16i8, VLCB, VLPB, 7>;
+defm : IntegerAbsoluteVectorOps<v8i16, VLCH, VLPH, 15>;
+defm : IntegerAbsoluteVectorOps<v4i32, VLCF, VLPF, 31>;
+defm : IntegerAbsoluteVectorOps<v2i64, VLCG, VLPG, 63>;
+
+// Instantiate minimum- and maximum-related patterns for TYPE. CMPH is the
+// signed or unsigned "set if greater than" comparison instruction and
+// MIN and MAX are the associated minimum and maximum instructions.
+multiclass IntegerMinMaxVectorOps<ValueType type, SDPatternOperator cmph,
+ Instruction min, Instruction max> {
+ let Predicates = [FeatureVector] in {
+ def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$x, VR128:$y)),
+ (max VR128:$x, VR128:$y)>;
+ def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$y, VR128:$x)),
+ (min VR128:$x, VR128:$y)>;
+ def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
+ VR128:$x, VR128:$y)),
+ (min VR128:$x, VR128:$y)>;
+ def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
+ VR128:$y, VR128:$x)),
+ (max VR128:$x, VR128:$y)>;
+ }
+}
+
+// Signed min/max.
+defm : IntegerMinMaxVectorOps<v16i8, z_vicmph, VMNB, VMXB>;
+defm : IntegerMinMaxVectorOps<v8i16, z_vicmph, VMNH, VMXH>;
+defm : IntegerMinMaxVectorOps<v4i32, z_vicmph, VMNF, VMXF>;
+defm : IntegerMinMaxVectorOps<v2i64, z_vicmph, VMNG, VMXG>;
+
+// Unsigned min/max.
+defm : IntegerMinMaxVectorOps<v16i8, z_vicmphl, VMNLB, VMXLB>;
+defm : IntegerMinMaxVectorOps<v8i16, z_vicmphl, VMNLH, VMXLH>;
+defm : IntegerMinMaxVectorOps<v4i32, z_vicmphl, VMNLF, VMXLF>;
+defm : IntegerMinMaxVectorOps<v2i64, z_vicmphl, VMNLG, VMXLG>;
+
+//===----------------------------------------------------------------------===//
+// Integer comparison
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Element compare.
+ let Defs = [CC] in {
+ def VECB : CompareVRRa<"vecb", 0xE7DB, null_frag, v128b, 0>;
+ def VECH : CompareVRRa<"vech", 0xE7DB, null_frag, v128h, 1>;
+ def VECF : CompareVRRa<"vecf", 0xE7DB, null_frag, v128f, 2>;
+ def VECG : CompareVRRa<"vecg", 0xE7DB, null_frag, v128g, 3>;
+ }
+
+ // Element compare logical.
+ let Defs = [CC] in {
+ def VECLB : CompareVRRa<"veclb", 0xE7D9, null_frag, v128b, 0>;
+ def VECLH : CompareVRRa<"veclh", 0xE7D9, null_frag, v128h, 1>;
+ def VECLF : CompareVRRa<"veclf", 0xE7D9, null_frag, v128f, 2>;
+ def VECLG : CompareVRRa<"veclg", 0xE7D9, null_frag, v128g, 3>;
+ }
+
+ // Compare equal.
+ defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, z_vicmpes,
+ v128b, v128b, 0>;
+ defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, z_vicmpes,
+ v128h, v128h, 1>;
+ defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, z_vicmpe, z_vicmpes,
+ v128f, v128f, 2>;
+ defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, z_vicmpes,
+ v128g, v128g, 3>;
+
+ // Compare high.
+ defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, z_vicmphs,
+ v128b, v128b, 0>;
+ defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, z_vicmphs,
+ v128h, v128h, 1>;
+ defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, z_vicmph, z_vicmphs,
+ v128f, v128f, 2>;
+ defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, z_vicmphs,
+ v128g, v128g, 3>;
+
+ // Compare high logical.
+ defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, z_vicmphls,
+ v128b, v128b, 0>;
+ defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, z_vicmphls,
+ v128h, v128h, 1>;
+ defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, z_vicmphl, z_vicmphls,
+ v128f, v128f, 2>;
+ defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, z_vicmphls,
+ v128g, v128g, 3>;
+
+ // Test under mask.
+ let Defs = [CC] in
+ def VTM : CompareVRRa<"vtm", 0xE7D8, z_vtm, v128b, 0>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point arithmetic
+//===----------------------------------------------------------------------===//
+
+// See comments in SystemZInstrFP.td for the suppression flags and
+// rounding modes.
+multiclass VectorRounding<Instruction insn, TypedReg tr> {
+ def : FPConversion<insn, frint, tr, tr, 0, 0>;
+ def : FPConversion<insn, fnearbyint, tr, tr, 4, 0>;
+ def : FPConversion<insn, ffloor, tr, tr, 4, 7>;
+ def : FPConversion<insn, fceil, tr, tr, 4, 6>;
+ def : FPConversion<insn, ftrunc, tr, tr, 4, 5>;
+ def : FPConversion<insn, frnd, tr, tr, 4, 1>;
+}
+
+let Predicates = [FeatureVector] in {
+ // Add.
+ def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
+ def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
+
+ // Convert from fixed 64-bit.
+ def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
+ def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
+ def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
+
+ // Convert from logical 64-bit.
+ def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
+ def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
+ def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
+
+ // Convert to fixed 64-bit.
+ def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
+ def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
+ // Rounding mode should agree with SystemZInstrFP.td.
+ def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
+
+ // Convert to logical 64-bit.
+ def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
+ def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
+ // Rounding mode should agree with SystemZInstrFP.td.
+ def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
+
+ // Divide.
+ def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
+ def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
+
+ // Load FP integer.
+ def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
+ def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
+ defm : VectorRounding<VFIDB, v128db>;
+ defm : VectorRounding<WFIDB, v64db>;
+
+ // Load lengthened.
+ def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>;
+ def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fextend, v64db, v32eb, 2, 8>;
+
+ // Load rounded,
+ def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>;
+ def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>;
+ def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
+ def : FPConversion<WLEDB, fround, v32eb, v64db, 0, 0>;
+
+ // Multiply.
+ def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
+ def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
+
+ // Multiply and add.
+ def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
+ def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
+
+ // Multiply and subtract.
+ def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
+ def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
+
+ // Load complement,
+ def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>;
+ def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>;
+
+ // Load negative.
+ def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>;
+ def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>;
+
+ // Load positive.
+ def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>;
+ def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>;
+
+ // Square root.
+ def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
+ def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
+
+ // Subtract.
+ def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
+ def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
+
+ // Test data class immediate.
+ let Defs = [CC] in {
+ def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>;
+ def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point comparison
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ // Compare scalar.
+ let Defs = [CC] in
+ def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
+
+ // Compare and signal scalar.
+ let Defs = [CC] in
+ def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
+
+ // Compare equal.
+ defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+ v128g, v128db, 3, 0>;
+ defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
+ v64g, v64db, 3, 8>;
+
+ // Compare high.
+ defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
+ v128g, v128db, 3, 0>;
+ defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
+ v64g, v64db, 3, 8>;
+
+ // Compare high or equal.
+ defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+ v128g, v128db, 3, 0>;
+ defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
+ v64g, v64db, 3, 8>;
+}
+
+//===----------------------------------------------------------------------===//
+// Conversions
+//===----------------------------------------------------------------------===//
+
+def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+
+def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Replicating scalars
+//===----------------------------------------------------------------------===//
+
+// Define patterns for replicating a scalar GR32 into a vector of type TYPE.
+// INDEX is 8 minus the element size in bytes.
+class VectorReplicateScalar<ValueType type, Instruction insn, bits<16> index>
+ : Pat<(type (z_replicate GR32:$scalar)),
+ (insn (VLVGP32 GR32:$scalar, GR32:$scalar), index)>;
+
+def : VectorReplicateScalar<v16i8, VREPB, 7>;
+def : VectorReplicateScalar<v8i16, VREPH, 3>;
+def : VectorReplicateScalar<v4i32, VREPF, 1>;
+
+// i64 replications are just a single isntruction.
+def : Pat<(v2i64 (z_replicate GR64:$scalar)),
+ (VLVGP GR64:$scalar, GR64:$scalar)>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point insertion and extraction
+//===----------------------------------------------------------------------===//
+
+// Moving 32-bit values between GPRs and FPRs can be done using VLVGF
+// and VLGVF.
+def LEFR : UnaryAliasVRS<VR32, GR32>;
+def LFER : UnaryAliasVRS<GR64, VR32>;
+def : Pat<(f32 (bitconvert (i32 GR32:$src))), (LEFR GR32:$src)>;
+def : Pat<(i32 (bitconvert (f32 VR32:$src))),
+ (EXTRACT_SUBREG (LFER VR32:$src), subreg_l32)>;
+
+// Floating-point values are stored in element 0 of the corresponding
+// vector register. Scalar to vector conversion is just a subreg and
+// scalar replication can just replicate element 0 of the vector register.
+multiclass ScalarToVectorFP<Instruction vrep, ValueType vt, RegisterOperand cls,
+ SubRegIndex subreg> {
+ def : Pat<(vt (scalar_to_vector cls:$scalar)),
+ (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, subreg)>;
+ def : Pat<(vt (z_replicate cls:$scalar)),
+ (vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar,
+ subreg), 0)>;
+}
+defm : ScalarToVectorFP<VREPF, v4f32, FP32, subreg_r32>;
+defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_r64>;
+
+// Match v2f64 insertions. The AddedComplexity counters the 3 added by
+// TableGen for the base register operand in VLVG-based integer insertions
+// and ensures that this version is strictly better.
+let AddedComplexity = 4 in {
+ def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0),
+ (VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
+ subreg_r64), VR128:$vec, 1)>;
+ def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1),
+ (VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
+ subreg_r64), 0)>;
+}
+
+// We extract floating-point element X by replicating (for elements other
+// than 0) and then taking a high subreg. The AddedComplexity counters the
+// 3 added by TableGen for the base register operand in VLGV-based integer
+// extractions and ensures that this version is strictly better.
+let AddedComplexity = 4 in {
+ def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), 0)),
+ (EXTRACT_SUBREG VR128:$vec, subreg_r32)>;
+ def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), imm32zx2:$index)),
+ (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_r32)>;
+
+ def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)),
+ (EXTRACT_SUBREG VR128:$vec, subreg_r64)>;
+ def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)),
+ (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>;
+}
+
+//===----------------------------------------------------------------------===//
+// String instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVector] in {
+ defm VFAEB : TernaryVRRbSPair<"vfaeb", 0xE782, int_s390_vfaeb, z_vfae_cc,
+ v128b, v128b, 0, 0>;
+ defm VFAEH : TernaryVRRbSPair<"vfaeh", 0xE782, int_s390_vfaeh, z_vfae_cc,
+ v128h, v128h, 1, 0>;
+ defm VFAEF : TernaryVRRbSPair<"vfaef", 0xE782, int_s390_vfaef, z_vfae_cc,
+ v128f, v128f, 2, 0>;
+ defm VFAEZB : TernaryVRRbSPair<"vfaezb", 0xE782, int_s390_vfaezb, z_vfaez_cc,
+ v128b, v128b, 0, 2>;
+ defm VFAEZH : TernaryVRRbSPair<"vfaezh", 0xE782, int_s390_vfaezh, z_vfaez_cc,
+ v128h, v128h, 1, 2>;
+ defm VFAEZF : TernaryVRRbSPair<"vfaezf", 0xE782, int_s390_vfaezf, z_vfaez_cc,
+ v128f, v128f, 2, 2>;
+
+ defm VFEEB : BinaryVRRbSPair<"vfeeb", 0xE780, int_s390_vfeeb, z_vfee_cc,
+ v128b, v128b, 0, 0, 1>;
+ defm VFEEH : BinaryVRRbSPair<"vfeeh", 0xE780, int_s390_vfeeh, z_vfee_cc,
+ v128h, v128h, 1, 0, 1>;
+ defm VFEEF : BinaryVRRbSPair<"vfeef", 0xE780, int_s390_vfeef, z_vfee_cc,
+ v128f, v128f, 2, 0, 1>;
+ defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, int_s390_vfeezb, z_vfeez_cc,
+ v128b, v128b, 0, 2, 3>;
+ defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, int_s390_vfeezh, z_vfeez_cc,
+ v128h, v128h, 1, 2, 3>;
+ defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, int_s390_vfeezf, z_vfeez_cc,
+ v128f, v128f, 2, 2, 3>;
+
+ defm VFENEB : BinaryVRRbSPair<"vfeneb", 0xE781, int_s390_vfeneb, z_vfene_cc,
+ v128b, v128b, 0, 0, 1>;
+ defm VFENEH : BinaryVRRbSPair<"vfeneh", 0xE781, int_s390_vfeneh, z_vfene_cc,
+ v128h, v128h, 1, 0, 1>;
+ defm VFENEF : BinaryVRRbSPair<"vfenef", 0xE781, int_s390_vfenef, z_vfene_cc,
+ v128f, v128f, 2, 0, 1>;
+ defm VFENEZB : BinaryVRRbSPair<"vfenezb", 0xE781, int_s390_vfenezb,
+ z_vfenez_cc, v128b, v128b, 0, 2, 3>;
+ defm VFENEZH : BinaryVRRbSPair<"vfenezh", 0xE781, int_s390_vfenezh,
+ z_vfenez_cc, v128h, v128h, 1, 2, 3>;
+ defm VFENEZF : BinaryVRRbSPair<"vfenezf", 0xE781, int_s390_vfenezf,
+ z_vfenez_cc, v128f, v128f, 2, 2, 3>;
+
+ defm VISTRB : UnaryVRRaSPair<"vistrb", 0xE75C, int_s390_vistrb, z_vistr_cc,
+ v128b, v128b, 0>;
+ defm VISTRH : UnaryVRRaSPair<"vistrh", 0xE75C, int_s390_vistrh, z_vistr_cc,
+ v128h, v128h, 1>;
+ defm VISTRF : UnaryVRRaSPair<"vistrf", 0xE75C, int_s390_vistrf, z_vistr_cc,
+ v128f, v128f, 2>;
+
+ defm VSTRCB : QuaternaryVRRdSPair<"vstrcb", 0xE78A, int_s390_vstrcb,
+ z_vstrc_cc, v128b, v128b, 0, 0>;
+ defm VSTRCH : QuaternaryVRRdSPair<"vstrch", 0xE78A, int_s390_vstrch,
+ z_vstrc_cc, v128h, v128h, 1, 0>;
+ defm VSTRCF : QuaternaryVRRdSPair<"vstrcf", 0xE78A, int_s390_vstrcf,
+ z_vstrc_cc, v128f, v128f, 2, 0>;
+ defm VSTRCZB : QuaternaryVRRdSPair<"vstrczb", 0xE78A, int_s390_vstrczb,
+ z_vstrcz_cc, v128b, v128b, 0, 2>;
+ defm VSTRCZH : QuaternaryVRRdSPair<"vstrczh", 0xE78A, int_s390_vstrczh,
+ z_vstrcz_cc, v128h, v128h, 1, 2>;
+ defm VSTRCZF : QuaternaryVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf,
+ z_vstrcz_cc, v128f, v128f, 2, 2>;
+}
diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp
new file mode 100644
index 0000000000000..24165be29ae77
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -0,0 +1,143 @@
+//===-- SystemZLDCleanup.cpp - Clean up local-dynamic TLS accesses --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines multiple accesses to local-dynamic TLS variables so that
+// the TLS base address for the module is only fetched once per execution path
+// through the function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class SystemZLDCleanup : public MachineFunctionPass {
+public:
+ static char ID;
+ SystemZLDCleanup(const SystemZTargetMachine &tm)
+ : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {}
+
+ const char *getPassName() const override {
+ return "SystemZ Local Dynamic TLS Access Clean-up";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg);
+ MachineInstr *ReplaceTLSCall(MachineInstr *I, unsigned TLSBaseAddrReg);
+ MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg);
+
+ const SystemZInstrInfo *TII;
+ MachineFunction *MF;
+};
+
+char SystemZLDCleanup::ID = 0;
+
+} // end anonymous namespace
+
+FunctionPass *llvm::createSystemZLDCleanupPass(SystemZTargetMachine &TM) {
+ return new SystemZLDCleanup(TM);
+}
+
+void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) {
+ TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
+ MF = &F;
+
+ SystemZMachineFunctionInfo* MFI = F.getInfo<SystemZMachineFunctionInfo>();
+ if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+ // No point folding accesses if there isn't at least two.
+ return false;
+ }
+
+ MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+ return VisitNode(DT->getRootNode(), 0);
+}
+
+// Visit the dominator subtree rooted at Node in pre-order.
+// If TLSBaseAddrReg is non-null, then use that to replace any
+// TLS_LDCALL instructions. Otherwise, create the register
+// when the first such instruction is seen, and then use it
+// as we encounter more instructions.
+bool SystemZLDCleanup::VisitNode(MachineDomTreeNode *Node,
+ unsigned TLSBaseAddrReg) {
+ MachineBasicBlock *BB = Node->getBlock();
+ bool Changed = false;
+
+ // Traverse the current block.
+ for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
+ switch (I->getOpcode()) {
+ case SystemZ::TLS_LDCALL:
+ if (TLSBaseAddrReg)
+ I = ReplaceTLSCall(I, TLSBaseAddrReg);
+ else
+ I = SetRegister(I, &TLSBaseAddrReg);
+ Changed = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Visit the children of this block in the dominator tree.
+ for (auto I = Node->begin(), E = Node->end(); I != E; ++I)
+ Changed |= VisitNode(*I, TLSBaseAddrReg);
+
+ return Changed;
+}
+
+// Replace the TLS_LDCALL instruction I with a copy from TLSBaseAddrReg,
+// returning the new instruction.
+MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I,
+ unsigned TLSBaseAddrReg) {
+ // Insert a Copy from TLSBaseAddrReg to R2.
+ MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), SystemZ::R2D)
+ .addReg(TLSBaseAddrReg);
+
+ // Erase the TLS_LDCALL instruction.
+ I->eraseFromParent();
+
+ return Copy;
+}
+
+// Create a virtal register in *TLSBaseAddrReg, and populate it by
+// inserting a copy instruction after I. Returns the new instruction.
+MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I,
+ unsigned *TLSBaseAddrReg) {
+ // Create a virtual register for the TLS base address.
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ *TLSBaseAddrReg = RegInfo.createVirtualRegister(&SystemZ::GR64BitRegClass);
+
+ // Insert a copy from R2 to TLSBaseAddrReg.
+ MachineInstr *Next = I->getNextNode();
+ MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
+ .addReg(SystemZ::R2D);
+
+ return Copy;
+}
+
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp
index df561e2d8002d..a1dcedab54e7d 100644
--- a/lib/Target/SystemZ/SystemZMCInstLower.cpp
+++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -11,6 +11,7 @@
#include "SystemZAsmPrinter.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
using namespace llvm;
@@ -22,6 +23,8 @@ static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) {
return MCSymbolRefExpr::VK_None;
case SystemZII::MO_GOT:
return MCSymbolRefExpr::VK_GOT;
+ case SystemZII::MO_INDNTPOFF:
+ return MCSymbolRefExpr::VK_INDNTPOFF;
}
llvm_unreachable("Unrecognised MO_ACCESS_MODEL");
}
@@ -77,14 +80,14 @@ SystemZMCInstLower::getExpr(const MachineOperand &MO,
MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const {
switch (MO.getType()) {
case MachineOperand::MO_Register:
- return MCOperand::CreateReg(MO.getReg());
+ return MCOperand::createReg(MO.getReg());
case MachineOperand::MO_Immediate:
- return MCOperand::CreateImm(MO.getImm());
+ return MCOperand::createImm(MO.getImm());
default: {
MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags());
- return MCOperand::CreateExpr(getExpr(MO, Kind));
+ return MCOperand::createExpr(getExpr(MO, Kind));
}
}
}
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 92c2ce7324a01..34fc36d6bf6c9 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -23,11 +23,13 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
unsigned VarArgsFrameIndex;
unsigned RegSaveFrameIndex;
bool ManipulatesSP;
+ unsigned NumLocalDynamics;
public:
explicit SystemZMachineFunctionInfo(MachineFunction &MF)
: LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0),
- VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false) {}
+ VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false),
+ NumLocalDynamics(0) {}
// Get and set the first call-saved GPR that should be saved and restored
// by this function. This is 0 if no GPRs need to be saved or restored.
@@ -61,6 +63,10 @@ public:
// e.g. through STACKSAVE or STACKRESTORE.
bool getManipulatesSP() const { return ManipulatesSP; }
void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; }
+
+ // Count number of local-dynamic TLS symbols used.
+ unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
+ void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 7be81dca727b1..9af90d492cf84 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -16,6 +16,11 @@ class ImmediateAsmOperand<string name>
let Name = name;
let RenderMethod = "addImmOperands";
}
+class ImmediateTLSAsmOperand<string name>
+ : AsmOperandClass {
+ let Name = name;
+ let RenderMethod = "addImmTLSOperands";
+}
// Constructs both a DAG pattern and instruction operand for an immediate
// of type VT. PRED returns true if a node is acceptable and XFORM returns
@@ -34,6 +39,11 @@ class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"##size> {
let PredicateMethod = "isImm";
let ParserMethod = "parsePCRel"##size;
}
+class PCRelTLSAsmOperand<string size>
+ : ImmediateTLSAsmOperand<"PCRelTLS"##size> {
+ let PredicateMethod = "isImmTLS";
+ let ParserMethod = "parsePCRelTLS"##size;
+}
// Constructs an operand for a PC-relative address with address type VT.
// ASMOP is the associated asm operand.
@@ -41,6 +51,10 @@ class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
let PrintMethod = "printPCRelOperand";
let ParserMatchClass = asmop;
}
+class PCRelTLSOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
+ let PrintMethod = "printPCRelTLSOperand";
+ let ParserMatchClass = asmop;
+}
// Constructs both a DAG pattern and instruction operand for a PC-relative
// address with address size VT. SELF is the name of the operand and
@@ -64,6 +78,22 @@ class AddressAsmOperand<string format, string bitsize, string dispsize,
let RenderMethod = "add"##format##"Operands";
}
+// Constructs an instruction operand for an addressing mode. FORMAT,
+// BITSIZE, DISPSIZE and LENGTH are the parameters to an associated
+// AddressAsmOperand. OPERANDS is a list of individual operands
+// (base register, displacement, etc.).
+class AddressOperand<string bitsize, string dispsize, string length,
+ string format, dag operands>
+ : Operand<!cast<ValueType>("i"##bitsize)> {
+ let PrintMethod = "print"##format##"Operand";
+ let EncoderMethod = "get"##format##dispsize##length##"Encoding";
+ let DecoderMethod =
+ "decode"##format##bitsize##"Disp"##dispsize##length##"Operand";
+ let MIOperandInfo = operands;
+ let ParserMatchClass =
+ !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length);
+}
+
// Constructs both a DAG pattern and instruction operand for an addressing mode.
// FORMAT, BITSIZE, DISPSIZE and LENGTH are the parameters to an associated
// AddressAsmOperand. OPERANDS is a list of NUMOPS individual operands
@@ -79,15 +109,7 @@ class AddressingMode<string seltype, string bitsize, string dispsize,
: ComplexPattern<!cast<ValueType>("i"##bitsize), numops,
"select"##seltype##dispsize##suffix##length,
[add, sub, or, frameindex, z_adjdynalloc]>,
- Operand<!cast<ValueType>("i"##bitsize)> {
- let PrintMethod = "print"##format##"Operand";
- let EncoderMethod = "get"##format##dispsize##length##"Encoding";
- let DecoderMethod =
- "decode"##format##bitsize##"Disp"##dispsize##length##"Operand";
- let MIOperandInfo = operands;
- let ParserMatchClass =
- !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length);
-}
+ AddressOperand<bitsize, dispsize, length, format, operands>;
// An addressing mode with a base and displacement but no index.
class BDMode<string type, string bitsize, string dispsize, string suffix>
@@ -111,6 +133,13 @@ class BDLMode<string type, string bitsize, string dispsize, string suffix,
!cast<Immediate>("disp"##dispsize##"imm"##bitsize),
!cast<Immediate>("imm"##bitsize))>;
+// An addressing mode with a base, displacement and a vector index.
+class BDVMode<string bitsize, string dispsize>
+ : AddressOperand<bitsize, dispsize, "", "BDVAddr",
+ (ops !cast<RegisterOperand>("ADDR"##bitsize),
+ !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
+ !cast<RegisterOperand>("VR128"))>;
+
//===----------------------------------------------------------------------===//
// Extracting immediate operands from nodes
// These all create MVT::i64 nodes to ensure the value is not sign-extended
@@ -120,82 +149,105 @@ class BDLMode<string type, string bitsize, string dispsize, string suffix,
// Bits 0-15 (counting from the lsb).
def LL16 : SDNodeXForm<imm, [{
uint64_t Value = N->getZExtValue() & 0x000000000000FFFFULL;
- return CurDAG->getTargetConstant(Value, MVT::i64);
+ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
}]>;
// Bits 16-31 (counting from the lsb).
def LH16 : SDNodeXForm<imm, [{
uint64_t Value = (N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16;
- return CurDAG->getTargetConstant(Value, MVT::i64);
+ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
}]>;
// Bits 32-47 (counting from the lsb).
def HL16 : SDNodeXForm<imm, [{
uint64_t Value = (N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32;
- return CurDAG->getTargetConstant(Value, MVT::i64);
+ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
}]>;
// Bits 48-63 (counting from the lsb).
def HH16 : SDNodeXForm<imm, [{
uint64_t Value = (N->getZExtValue() & 0xFFFF000000000000ULL) >> 48;
- return CurDAG->getTargetConstant(Value, MVT::i64);
+ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
}]>;
// Low 32 bits.
def LF32 : SDNodeXForm<imm, [{
uint64_t Value = N->getZExtValue() & 0x00000000FFFFFFFFULL;
- return CurDAG->getTargetConstant(Value, MVT::i64);
+ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
}]>;
// High 32 bits.
def HF32 : SDNodeXForm<imm, [{
uint64_t Value = N->getZExtValue() >> 32;
- return CurDAG->getTargetConstant(Value, MVT::i64);
+ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
}]>;
// Truncate an immediate to a 8-bit signed quantity.
def SIMM8 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), MVT::i64);
+ return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), SDLoc(N),
+ MVT::i64);
}]>;
// Truncate an immediate to a 8-bit unsigned quantity.
def UIMM8 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()), MVT::i64);
+ return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()), SDLoc(N),
+ MVT::i64);
+}]>;
+
+// Truncate an immediate to a 8-bit unsigned quantity and mask off low bit.
+def UIMM8EVEN : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 0xfe, SDLoc(N),
+ MVT::i64);
+}]>;
+
+// Truncate an immediate to a 12-bit unsigned quantity.
+def UIMM12 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 0xfff, SDLoc(N),
+ MVT::i64);
}]>;
// Truncate an immediate to a 16-bit signed quantity.
def SIMM16 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(int16_t(N->getZExtValue()), MVT::i64);
+ return CurDAG->getTargetConstant(int16_t(N->getZExtValue()), SDLoc(N),
+ MVT::i64);
}]>;
// Truncate an immediate to a 16-bit unsigned quantity.
def UIMM16 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(uint16_t(N->getZExtValue()), MVT::i64);
+ return CurDAG->getTargetConstant(uint16_t(N->getZExtValue()), SDLoc(N),
+ MVT::i64);
}]>;
// Truncate an immediate to a 32-bit signed quantity.
def SIMM32 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(int32_t(N->getZExtValue()), MVT::i64);
+ return CurDAG->getTargetConstant(int32_t(N->getZExtValue()), SDLoc(N),
+ MVT::i64);
}]>;
// Truncate an immediate to a 32-bit unsigned quantity.
def UIMM32 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(uint32_t(N->getZExtValue()), MVT::i64);
+ return CurDAG->getTargetConstant(uint32_t(N->getZExtValue()), SDLoc(N),
+ MVT::i64);
}]>;
// Negate and then truncate an immediate to a 32-bit unsigned quantity.
def NEGIMM32 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), MVT::i64);
+ return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), SDLoc(N),
+ MVT::i64);
}]>;
//===----------------------------------------------------------------------===//
// Immediate asm operands.
//===----------------------------------------------------------------------===//
+def U1Imm : ImmediateAsmOperand<"U1Imm">;
+def U2Imm : ImmediateAsmOperand<"U2Imm">;
+def U3Imm : ImmediateAsmOperand<"U3Imm">;
def U4Imm : ImmediateAsmOperand<"U4Imm">;
def U6Imm : ImmediateAsmOperand<"U6Imm">;
def S8Imm : ImmediateAsmOperand<"S8Imm">;
def U8Imm : ImmediateAsmOperand<"U8Imm">;
+def U12Imm : ImmediateAsmOperand<"U12Imm">;
def S16Imm : ImmediateAsmOperand<"S16Imm">;
def U16Imm : ImmediateAsmOperand<"U16Imm">;
def S32Imm : ImmediateAsmOperand<"S32Imm">;
@@ -226,10 +278,28 @@ def imm32lh16c : Immediate<i32, [{
}], LH16, "U16Imm">;
// Short immediates
+def imm32zx1 : Immediate<i32, [{
+ return isUInt<1>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U1Imm">;
+
+def imm32zx2 : Immediate<i32, [{
+ return isUInt<2>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U2Imm">;
+
+def imm32zx3 : Immediate<i32, [{
+ return isUInt<3>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U3Imm">;
+
def imm32zx4 : Immediate<i32, [{
return isUInt<4>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U4Imm">;
+// Note: this enforces an even value during code generation only.
+// When used from the assembler, any 4-bit value is allowed.
+def imm32zx4even : Immediate<i32, [{
+ return isUInt<4>(N->getZExtValue());
+}], UIMM8EVEN, "U4Imm">;
+
def imm32zx6 : Immediate<i32, [{
return isUInt<6>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U6Imm">;
@@ -244,6 +314,10 @@ def imm32zx8 : Immediate<i32, [{
def imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
+def imm32zx12 : Immediate<i32, [{
+ return isUInt<12>(N->getZExtValue());
+}], UIMM12, "U12Imm">;
+
def imm32sx16 : Immediate<i32, [{
return isInt<16>(N->getSExtValue());
}], SIMM16, "S16Imm">;
@@ -370,6 +444,8 @@ def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>;
// PC-relative asm operands.
def PCRel16 : PCRelAsmOperand<"16">;
def PCRel32 : PCRelAsmOperand<"32">;
+def PCRelTLS16 : PCRelTLSAsmOperand<"16">;
+def PCRelTLS32 : PCRelTLSAsmOperand<"32">;
// PC-relative offsets of a basic block. The offset is sign-extended
// and multiplied by 2.
@@ -382,6 +458,20 @@ def brtarget32 : PCRelOperand<OtherVT, PCRel32> {
let DecoderMethod = "decodePC32DBLOperand";
}
+// Variants of brtarget16/32 with an optional additional TLS symbol.
+// These are used to annotate calls to __tls_get_offset.
+def tlssym : Operand<i64> { }
+def brtarget16tls : PCRelTLSOperand<OtherVT, PCRelTLS16> {
+ let MIOperandInfo = (ops brtarget16:$func, tlssym:$sym);
+ let EncoderMethod = "getPC16DBLTLSEncoding";
+ let DecoderMethod = "decodePC16DBLOperand";
+}
+def brtarget32tls : PCRelTLSOperand<OtherVT, PCRelTLS32> {
+ let MIOperandInfo = (ops brtarget32:$func, tlssym:$sym);
+ let EncoderMethod = "getPC32DBLTLSEncoding";
+ let DecoderMethod = "decodePC32DBLOperand";
+}
+
// A PC-relative offset of a global value. The offset is sign-extended
// and multiplied by 2.
def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> {
@@ -408,6 +498,7 @@ def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">;
def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">;
def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">;
def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">;
+def BDVAddr64Disp12 : AddressAsmOperand<"BDVAddr", "64", "12">;
// DAG patterns and operands for addressing modes. Each mode has
// the form <type><range><group>[<len>] where:
@@ -420,6 +511,7 @@ def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">;
// laaddr : like bdxaddr, but used for Load Address operations
// dynalloc : base + displacement + index + ADJDYNALLOC
// bdladdr : base + displacement with a length field
+// bdvaddr : base + displacement with a vector index
//
// <range> is one of:
// 12 : the displacement is an unsigned 12-bit value
@@ -452,6 +544,7 @@ def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">;
def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">;
def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">;
def bdladdr12onlylen8 : BDLMode<"BDLAddr", "64", "12", "Only", "8">;
+def bdvaddr12only : BDVMode< "64", "12">;
//===----------------------------------------------------------------------===//
// Miscellaneous
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index c70e662db4270..3c95a1e11b45a 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -79,6 +79,64 @@ def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
def SDT_ZPrefetch : SDTypeProfile<0, 2,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>]>;
+def SDT_ZTBegin : SDTypeProfile<0, 2,
+ [SDTCisPtrTy<0>,
+ SDTCisVT<1, i32>]>;
+def SDT_ZInsertVectorElt : SDTypeProfile<1, 3,
+ [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisVT<3, i32>]>;
+def SDT_ZExtractVectorElt : SDTypeProfile<1, 2,
+ [SDTCisVec<1>,
+ SDTCisVT<2, i32>]>;
+def SDT_ZReplicate : SDTypeProfile<1, 1,
+ [SDTCisVec<0>]>;
+def SDT_ZVecUnaryConv : SDTypeProfile<1, 1,
+ [SDTCisVec<0>,
+ SDTCisVec<1>]>;
+def SDT_ZVecUnary : SDTypeProfile<1, 1,
+ [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>]>;
+def SDT_ZVecBinary : SDTypeProfile<1, 2,
+ [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def SDT_ZVecBinaryInt : SDTypeProfile<1, 2,
+ [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def SDT_ZVecBinaryConv : SDTypeProfile<1, 2,
+ [SDTCisVec<0>,
+ SDTCisVec<1>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_ZVecBinaryConvInt : SDTypeProfile<1, 2,
+ [SDTCisVec<0>,
+ SDTCisVec<1>,
+ SDTCisVT<2, i32>]>;
+def SDT_ZRotateMask : SDTypeProfile<1, 2,
+ [SDTCisVec<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+def SDT_ZJoinDwords : SDTypeProfile<1, 2,
+ [SDTCisVT<0, v2i64>,
+ SDTCisVT<1, i64>,
+ SDTCisVT<2, i64>]>;
+def SDT_ZVecTernary : SDTypeProfile<1, 3,
+ [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>;
+def SDT_ZVecTernaryInt : SDTypeProfile<1, 3,
+ [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisVT<3, i32>]>;
+def SDT_ZVecQuaternaryInt : SDTypeProfile<1, 4,
+ [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisVT<4, i32>]>;
//===----------------------------------------------------------------------===//
// Node definitions
@@ -90,6 +148,7 @@ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd,
[SDNPHasChain, SDNPSideEffect, SDNPOptInGlue,
SDNPOutGlue]>;
+def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>;
// Nodes for SystemZISD::*. See SystemZISelLowering.h for more details.
def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
@@ -100,6 +159,12 @@ def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall,
def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
+def z_tls_gdcall : SDNode<"SystemZISD::TLS_GDCALL", SDT_ZCall,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def z_tls_ldcall : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET",
SDT_ZWrapOffset, []>;
@@ -114,6 +179,7 @@ def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS",
SDT_ZExtractAccess>;
+def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
@@ -123,6 +189,80 @@ def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone,
[SDNPHasChain, SDNPMayStore]>;
+// Defined because the index is an i32 rather than a pointer.
+def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
+ SDT_ZInsertVectorElt>;
+def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
+ SDT_ZExtractVectorElt>;
+def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>;
+def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>;
+def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>;
+def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>;
+def z_splat : SDNode<"SystemZISD::SPLAT", SDT_ZVecBinaryInt>;
+def z_merge_high : SDNode<"SystemZISD::MERGE_HIGH", SDT_ZVecBinary>;
+def z_merge_low : SDNode<"SystemZISD::MERGE_LOW", SDT_ZVecBinary>;
+def z_shl_double : SDNode<"SystemZISD::SHL_DOUBLE", SDT_ZVecTernaryInt>;
+def z_permute_dwords : SDNode<"SystemZISD::PERMUTE_DWORDS",
+ SDT_ZVecTernaryInt>;
+def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>;
+def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>;
+def z_packs_cc : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConv,
+ [SDNPOutGlue]>;
+def z_packls_cc : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConv,
+ [SDNPOutGlue]>;
+def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>;
+def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>;
+def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>;
+def z_unpackl_low : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnaryConv>;
+def z_vshl_by_scalar : SDNode<"SystemZISD::VSHL_BY_SCALAR",
+ SDT_ZVecBinaryInt>;
+def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR",
+ SDT_ZVecBinaryInt>;
+def z_vsra_by_scalar : SDNode<"SystemZISD::VSRA_BY_SCALAR",
+ SDT_ZVecBinaryInt>;
+def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>;
+def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>;
+def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>;
+def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>;
+def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinary,
+ [SDNPOutGlue]>;
+def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinary,
+ [SDNPOutGlue]>;
+def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinary,
+ [SDNPOutGlue]>;
+def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>;
+def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>;
+def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>;
+def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConv,
+ [SDNPOutGlue]>;
+def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConv,
+ [SDNPOutGlue]>;
+def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConv,
+ [SDNPOutGlue]>;
+def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>;
+def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>;
+def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp, [SDNPOutGlue]>;
+def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryInt,
+ [SDNPOutGlue]>;
+def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryInt,
+ [SDNPOutGlue]>;
+def z_vfee_cc : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinary,
+ [SDNPOutGlue]>;
+def z_vfeez_cc : SDNode<"SystemZISD::VFEEZ_CC", SDT_ZVecBinary,
+ [SDNPOutGlue]>;
+def z_vfene_cc : SDNode<"SystemZISD::VFENE_CC", SDT_ZVecBinary,
+ [SDNPOutGlue]>;
+def z_vfenez_cc : SDNode<"SystemZISD::VFENEZ_CC", SDT_ZVecBinary,
+ [SDNPOutGlue]>;
+def z_vistr_cc : SDNode<"SystemZISD::VISTR_CC", SDT_ZVecUnary,
+ [SDNPOutGlue]>;
+def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC", SDT_ZVecQuaternaryInt,
+ [SDNPOutGlue]>;
+def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC",
+ SDT_ZVecQuaternaryInt, [SDNPOutGlue]>;
+def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvInt,
+ [SDNPOutGlue]>;
+
class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
: SDNode<"SystemZISD::"##name, profile,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
@@ -172,6 +312,19 @@ def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
SDNPMemOperand]>;
+def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+ SDNPSideEffect]>;
+def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+ SDNPSideEffect]>;
+def z_tend : SDNode<"SystemZISD::TEND", SDTNone,
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+
+def z_vshl : SDNode<"ISD::SHL", SDT_ZVecBinary>;
+def z_vsra : SDNode<"ISD::SRA", SDT_ZVecBinary>;
+def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>;
+
//===----------------------------------------------------------------------===//
// Pattern fragments
//===----------------------------------------------------------------------===//
@@ -195,11 +348,21 @@ def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>;
def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>;
def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>;
+// Match extensions of an i32 to an i64, followed by an in-register sign
+// extension from a sub-i32 value.
+def sext8dbl : PatFrag<(ops node:$src), (sext8 (anyext node:$src))>;
+def sext16dbl : PatFrag<(ops node:$src), (sext16 (anyext node:$src))>;
+
// Register zero-extend operations. Sub-32-bit values are represented as i32s.
def zext8 : PatFrag<(ops node:$src), (and node:$src, 0xff)>;
def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>;
def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
+// Match extensions of an i32 to an i64, followed by an AND of the low
+// i8 or i16 part.
+def zext8dbl : PatFrag<(ops node:$src), (zext8 (anyext node:$src))>;
+def zext16dbl : PatFrag<(ops node:$src), (zext16 (anyext node:$src))>;
+
// Typed floating-point loads.
def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>;
def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>;
@@ -363,6 +526,14 @@ def z_iabs64 : PatFrag<(ops node:$src),
def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>;
def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>;
+// Integer multiply-and-add
+def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (add (mul node:$src1, node:$src2), node:$src3)>;
+
+// Fused multiply-subtract, using the natural operand order.
+def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (fma node:$src1, node:$src2, (fneg node:$src3))>;
+
// Fused multiply-add and multiply-subtract, but with the order of the
// operands matching SystemZ's MA and MS instructions.
def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
@@ -383,3 +554,110 @@ class loadu<SDPatternOperator operator, SDPatternOperator load = load>
class storeu<SDPatternOperator operator, SDPatternOperator store = store>
: PatFrag<(ops node:$value, node:$addr),
(store (operator node:$value), node:$addr)>;
+
+// Vector representation of all-zeros and all-ones.
+def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
+def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
+
+// Load a scalar and replicate it in all elements of a vector.
+class z_replicate_load<ValueType scalartype, SDPatternOperator load>
+ : PatFrag<(ops node:$addr),
+ (z_replicate (scalartype (load node:$addr)))>;
+def z_replicate_loadi8 : z_replicate_load<i32, anyextloadi8>;
+def z_replicate_loadi16 : z_replicate_load<i32, anyextloadi16>;
+def z_replicate_loadi32 : z_replicate_load<i32, load>;
+def z_replicate_loadi64 : z_replicate_load<i64, load>;
+def z_replicate_loadf32 : z_replicate_load<f32, load>;
+def z_replicate_loadf64 : z_replicate_load<f64, load>;
+
+// Load a scalar and insert it into a single element of a vector.
+class z_vle<ValueType scalartype, SDPatternOperator load>
+ : PatFrag<(ops node:$vec, node:$addr, node:$index),
+ (z_vector_insert node:$vec, (scalartype (load node:$addr)),
+ node:$index)>;
+def z_vlei8 : z_vle<i32, anyextloadi8>;
+def z_vlei16 : z_vle<i32, anyextloadi16>;
+def z_vlei32 : z_vle<i32, load>;
+def z_vlei64 : z_vle<i64, load>;
+def z_vlef32 : z_vle<f32, load>;
+def z_vlef64 : z_vle<f64, load>;
+
+// Load a scalar and insert it into the low element of the high i64 of a
+// zeroed vector.
+class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
+ : PatFrag<(ops node:$addr),
+ (z_vector_insert (z_vzero),
+ (scalartype (load node:$addr)), (i32 index))>;
+def z_vllezi8 : z_vllez<i32, anyextloadi8, 7>;
+def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
+def z_vllezi32 : z_vllez<i32, load, 1>;
+def z_vllezi64 : PatFrag<(ops node:$addr),
+ (z_join_dwords (i64 (load node:$addr)), (i64 0))>;
+// We use high merges to form a v4f32 from four f32s. Propagating zero
+// into all elements but index 1 gives this expression.
+def z_vllezf32 : PatFrag<(ops node:$addr),
+ (bitconvert
+ (z_merge_high
+ (v2i64
+ (z_unpackl_high
+ (v4i32
+ (bitconvert
+ (v4f32 (scalar_to_vector
+ (f32 (load node:$addr)))))))),
+ (v2i64 (z_vzero))))>;
+def z_vllezf64 : PatFrag<(ops node:$addr),
+ (z_merge_high
+ (scalar_to_vector (f64 (load node:$addr))),
+ (z_vzero))>;
+
+// Store one element of a vector.
+class z_vste<ValueType scalartype, SDPatternOperator store>
+ : PatFrag<(ops node:$vec, node:$addr, node:$index),
+ (store (scalartype (z_vector_extract node:$vec, node:$index)),
+ node:$addr)>;
+def z_vstei8 : z_vste<i32, truncstorei8>;
+def z_vstei16 : z_vste<i32, truncstorei16>;
+def z_vstei32 : z_vste<i32, store>;
+def z_vstei64 : z_vste<i64, store>;
+def z_vstef32 : z_vste<f32, store>;
+def z_vstef64 : z_vste<f64, store>;
+
+// Arithmetic negation on vectors.
+def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>;
+
+// Bitwise negation on vectors.
+def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>;
+
+// Signed "integer greater than zero" on vectors.
+def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>;
+
+// Signed "integer less than zero" on vectors.
+def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>;
+
+// Integer absolute on vectors.
+class z_viabs<int shift>
+ : PatFrag<(ops node:$src),
+ (xor (add node:$src, (z_vsra_by_scalar node:$src, (i32 shift))),
+ (z_vsra_by_scalar node:$src, (i32 shift)))>;
+def z_viabs8 : z_viabs<7>;
+def z_viabs16 : z_viabs<15>;
+def z_viabs32 : z_viabs<31>;
+def z_viabs64 : z_viabs<63>;
+
+// Sign-extend the i64 elements of a vector.
+class z_vse<int shift>
+ : PatFrag<(ops node:$src),
+ (z_vsra_by_scalar (z_vshl_by_scalar node:$src, shift), shift)>;
+def z_vsei8 : z_vse<56>;
+def z_vsei16 : z_vse<48>;
+def z_vsei32 : z_vse<32>;
+
+// ...and again with the extensions being done on individual i64 scalars.
+class z_vse_by_parts<SDPatternOperator operator, int index1, int index2>
+ : PatFrag<(ops node:$src),
+ (z_join_dwords
+ (operator (z_vector_extract node:$src, index1)),
+ (operator (z_vector_extract node:$src, index2)))>;
+def z_vsei8_by_parts : z_vse_by_parts<sext8dbl, 7, 15>;
+def z_vsei16_by_parts : z_vse_by_parts<sext16dbl, 3, 7>;
+def z_vsei32_by_parts : z_vse_by_parts<sext32, 1, 3>;
diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td
index e307f8a888eed..16a7ed784d709 100644
--- a/lib/Target/SystemZ/SystemZPatterns.td
+++ b/lib/Target/SystemZ/SystemZPatterns.td
@@ -153,3 +153,17 @@ multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> {
// The sign of the zero makes no difference.
def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
}
+
+// Use INSN for performing binary operation OPERATION of type VT
+// on registers of class CLS.
+class BinaryRRWithType<Instruction insn, RegisterOperand cls,
+ SDPatternOperator operator, ValueType vt>
+ : Pat<(vt (operator cls:$x, cls:$y)), (insn cls:$x, cls:$y)>;
+
+// Use INSN to perform conversion operation OPERATOR, with the input being
+// TR2 and the output being TR1. SUPPRESS is 4 to suppress inexact conditions
+// and 0 to allow them. MODE is the rounding mode to use.
+class FPConversion<Instruction insn, SDPatternOperator operator, TypedReg tr1,
+ TypedReg tr2, bits<3> suppress, bits<4> mode>
+ : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))),
+ (insn tr2.op:$vec, suppress, mode)>;
diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td
index e6b58f17b0e68..32fbe5ae9ef91 100644
--- a/lib/Target/SystemZ/SystemZProcessors.td
+++ b/lib/Target/SystemZ/SystemZProcessors.td
@@ -12,12 +12,12 @@
//===----------------------------------------------------------------------===//
class SystemZFeature<string extname, string intname, string desc>
- : Predicate<"Subtarget.has"##intname##"()">,
+ : Predicate<"Subtarget->has"##intname##"()">,
AssemblerPredicate<"Feature"##intname, extname>,
SubtargetFeature<extname, "Has"##intname, "true", desc>;
class SystemZMissingFeature<string intname>
- : Predicate<"!Subtarget.has"##intname##"()">;
+ : Predicate<"!Subtarget->has"##intname##"()">;
def FeatureDistinctOps : SystemZFeature<
"distinct-ops", "DistinctOps",
@@ -39,6 +39,11 @@ def FeatureFPExtension : SystemZFeature<
"Assume that the floating-point extension facility is installed"
>;
+def FeaturePopulationCount : SystemZFeature<
+ "population-count", "PopulationCount",
+ "Assume that the population-count facility is installed"
+>;
+
def FeatureFastSerialization : SystemZFeature<
"fast-serialization", "FastSerialization",
"Assume that the fast-serialization facility is installed"
@@ -50,13 +55,42 @@ def FeatureInterlockedAccess1 : SystemZFeature<
>;
def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">;
+def FeatureMiscellaneousExtensions : SystemZFeature<
+ "miscellaneous-extensions", "MiscellaneousExtensions",
+ "Assume that the miscellaneous-extensions facility is installed"
+>;
+
+def FeatureTransactionalExecution : SystemZFeature<
+ "transactional-execution", "TransactionalExecution",
+ "Assume that the transactional-execution facility is installed"
+>;
+
+def FeatureProcessorAssist : SystemZFeature<
+ "processor-assist", "ProcessorAssist",
+ "Assume that the processor-assist facility is installed"
+>;
+
+def FeatureVector : SystemZFeature<
+ "vector", "Vector",
+ "Assume that the vectory facility is installed"
+>;
+def FeatureNoVector : SystemZMissingFeature<"Vector">;
+
def : Processor<"generic", NoItineraries, []>;
def : Processor<"z10", NoItineraries, []>;
def : Processor<"z196", NoItineraries,
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
- FeatureFPExtension, FeatureFastSerialization,
- FeatureInterlockedAccess1]>;
+ FeatureFPExtension, FeaturePopulationCount,
+ FeatureFastSerialization, FeatureInterlockedAccess1]>;
def : Processor<"zEC12", NoItineraries,
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
- FeatureFPExtension, FeatureFastSerialization,
- FeatureInterlockedAccess1]>;
+ FeatureFPExtension, FeaturePopulationCount,
+ FeatureFastSerialization, FeatureInterlockedAccess1,
+ FeatureMiscellaneousExtensions,
+ FeatureTransactionalExecution, FeatureProcessorAssist]>;
+def : Processor<"z13", NoItineraries,
+ [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
+ FeatureFPExtension, FeaturePopulationCount,
+ FeatureFastSerialization, FeatureInterlockedAccess1,
+ FeatureTransactionalExecution, FeatureProcessorAssist,
+ FeatureVector]>;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 64f5eebf37c14..7cabea962e911 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -28,7 +28,8 @@ SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
const uint32_t *
-SystemZRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
return CSR_SystemZ_RegMask;
}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 212fe91f38abb..a0db5a9c188fb 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -43,9 +43,9 @@ public:
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
return true;
}
- const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF = nullptr) const
- override;
- const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index 47ac20dae78ab..85aa0a62cc767 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -25,20 +25,24 @@ def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_ll32.
def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_lh32.
def subreg_l64 : SubRegIndex<64, 0>;
def subreg_h64 : SubRegIndex<64, 64>;
+def subreg_r32 : SubRegIndex<32, 32>; // Reinterpret a wider reg as 32 bits.
+def subreg_r64 : SubRegIndex<64, 64>; // Reinterpret a wider reg as 64 bits.
def subreg_hh32 : ComposedSubRegIndex<subreg_h64, subreg_h32>;
def subreg_hl32 : ComposedSubRegIndex<subreg_h64, subreg_l32>;
+def subreg_hr32 : ComposedSubRegIndex<subreg_h64, subreg_r32>;
}
-// Define a register class that contains values of type TYPE and an
+// Define a register class that contains values of types TYPES and an
// associated operand called NAME. SIZE is the size and alignment
// of the registers and REGLIST is the list of individual registers.
-multiclass SystemZRegClass<string name, ValueType type, int size, dag regList> {
+multiclass SystemZRegClass<string name, list<ValueType> types, int size,
+ dag regList> {
def AsmOperand : AsmOperandClass {
let Name = name;
let ParserMethod = "parse"##name;
let RenderMethod = "addRegOperands";
}
- def Bit : RegisterClass<"SystemZ", [type], size, regList> {
+ def Bit : RegisterClass<"SystemZ", types, size, regList> {
let Size = size;
}
def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> {
@@ -84,16 +88,19 @@ foreach I = [0, 2, 4, 6, 8, 10, 12, 14] in {
/// Allocate the callee-saved R6-R13 backwards. That way they can be saved
/// together with R14 and R15 in one prolog instruction.
-defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uL", 0, 5),
- (sequence "R%uL", 15, 6))>;
-defm GRH32 : SystemZRegClass<"GRH32", i32, 32, (add (sequence "R%uH", 0, 5),
- (sequence "R%uH", 15, 6))>;
-defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5),
- (sequence "R%uD", 15, 6))>;
+defm GR32 : SystemZRegClass<"GR32", [i32], 32,
+ (add (sequence "R%uL", 0, 5),
+ (sequence "R%uL", 15, 6))>;
+defm GRH32 : SystemZRegClass<"GRH32", [i32], 32,
+ (add (sequence "R%uH", 0, 5),
+ (sequence "R%uH", 15, 6))>;
+defm GR64 : SystemZRegClass<"GR64", [i64], 64,
+ (add (sequence "R%uD", 0, 5),
+ (sequence "R%uD", 15, 6))>;
// Combine the low and high GR32s into a single class. This can only be
// used for virtual registers if the high-word facility is available.
-defm GRX32 : SystemZRegClass<"GRX32", i32, 32,
+defm GRX32 : SystemZRegClass<"GRX32", [i32], 32,
(add (sequence "R%uL", 0, 5),
(sequence "R%uH", 0, 5),
R15L, R15H, R14L, R14H, R13L, R13H,
@@ -102,18 +109,17 @@ defm GRX32 : SystemZRegClass<"GRX32", i32, 32,
// The architecture doesn't really have any i128 support, so model the
// register pairs as untyped instead.
-defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q,
- R12Q, R10Q, R8Q, R6Q,
- R14Q)>;
+defm GR128 : SystemZRegClass<"GR128", [untyped], 128,
+ (add R0Q, R2Q, R4Q, R12Q, R10Q, R8Q, R6Q, R14Q)>;
// Base and index registers. Everything except R0, which in an address
// context evaluates as 0.
-defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0L)>;
-defm ADDR64 : SystemZRegClass<"ADDR64", i64, 64, (sub GR64Bit, R0D)>;
+defm ADDR32 : SystemZRegClass<"ADDR32", [i32], 32, (sub GR32Bit, R0L)>;
+defm ADDR64 : SystemZRegClass<"ADDR64", [i64], 64, (sub GR64Bit, R0D)>;
// Not used directly, but needs to exist for ADDR32 and ADDR64 subregs
// of a GR128.
-defm ADDR128 : SystemZRegClass<"ADDR128", untyped, 128, (sub GR128Bit, R0Q)>;
+defm ADDR128 : SystemZRegClass<"ADDR128", [untyped], 128, (sub GR128Bit, R0Q)>;
//===----------------------------------------------------------------------===//
// Floating-point registers
@@ -142,16 +148,36 @@ def F11Dwarf : DwarfMapping<29>;
def F13Dwarf : DwarfMapping<30>;
def F15Dwarf : DwarfMapping<31>;
-// Lower 32 bits of one of the 16 64-bit floating-point registers
+def F16Dwarf : DwarfMapping<68>;
+def F18Dwarf : DwarfMapping<69>;
+def F20Dwarf : DwarfMapping<70>;
+def F22Dwarf : DwarfMapping<71>;
+
+def F17Dwarf : DwarfMapping<72>;
+def F19Dwarf : DwarfMapping<73>;
+def F21Dwarf : DwarfMapping<74>;
+def F23Dwarf : DwarfMapping<75>;
+
+def F24Dwarf : DwarfMapping<76>;
+def F26Dwarf : DwarfMapping<77>;
+def F28Dwarf : DwarfMapping<78>;
+def F30Dwarf : DwarfMapping<79>;
+
+def F25Dwarf : DwarfMapping<80>;
+def F27Dwarf : DwarfMapping<81>;
+def F29Dwarf : DwarfMapping<82>;
+def F31Dwarf : DwarfMapping<83>;
+
+// Upper 32 bits of one of the floating-point registers
class FPR32<bits<16> num, string n> : SystemZReg<n> {
let HWEncoding = num;
}
-// One of the 16 64-bit floating-point registers
-class FPR64<bits<16> num, string n, FPR32 low>
- : SystemZRegWithSubregs<n, [low]> {
+// One of the floating-point registers.
+class FPR64<bits<16> num, string n, FPR32 high>
+ : SystemZRegWithSubregs<n, [high]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_h32];
+ let SubRegIndices = [subreg_r32];
}
// 8 pairs of FPR64s, with a one-register gap inbetween.
@@ -161,12 +187,17 @@ class FPR128<bits<16> num, string n, FPR64 low, FPR64 high>
let SubRegIndices = [subreg_l64, subreg_h64];
}
-// Floating-point registers
+// Floating-point registers. Registers 16-31 require the vector facility.
foreach I = 0-15 in {
def F#I#S : FPR32<I, "f"#I>;
def F#I#D : FPR64<I, "f"#I, !cast<FPR32>("F"#I#"S")>,
DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>;
}
+foreach I = 16-31 in {
+ def F#I#S : FPR32<I, "v"#I>;
+ def F#I#D : FPR64<I, "v"#I, !cast<FPR32>("F"#I#"S")>,
+ DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>;
+}
foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in {
def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#!add(I, 2)#"D"),
@@ -175,10 +206,74 @@ foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in {
// There's no store-multiple instruction for FPRs, so we're not fussy
// about the order in which call-saved registers are allocated.
-defm FP32 : SystemZRegClass<"FP32", f32, 32, (sequence "F%uS", 0, 15)>;
-defm FP64 : SystemZRegClass<"FP64", f64, 64, (sequence "F%uD", 0, 15)>;
-defm FP128 : SystemZRegClass<"FP128", f128, 128, (add F0Q, F1Q, F4Q, F5Q,
- F8Q, F9Q, F12Q, F13Q)>;
+defm FP32 : SystemZRegClass<"FP32", [f32], 32, (sequence "F%uS", 0, 15)>;
+defm FP64 : SystemZRegClass<"FP64", [f64], 64, (sequence "F%uD", 0, 15)>;
+defm FP128 : SystemZRegClass<"FP128", [f128], 128,
+ (add F0Q, F1Q, F4Q, F5Q, F8Q, F9Q, F12Q, F13Q)>;
+
+//===----------------------------------------------------------------------===//
+// Vector registers
+//===----------------------------------------------------------------------===//
+
+// A full 128-bit vector register, with an FPR64 as its high part.
+class VR128<bits<16> num, string n, FPR64 high>
+ : SystemZRegWithSubregs<n, [high]> {
+ let HWEncoding = num;
+ let SubRegIndices = [subreg_r64];
+}
+
+// Full vector registers.
+foreach I = 0-31 in {
+ def V#I : VR128<I, "v"#I, !cast<FPR64>("F"#I#"D")>,
+ DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>;
+}
+
+// Class used to store 32-bit values in the first element of a vector
+// register. f32 scalars are used for the WLEDB and WLDEB instructions.
+defm VR32 : SystemZRegClass<"VR32", [f32, v4i8, v2i16], 32,
+ (add (sequence "F%uS", 0, 7),
+ (sequence "F%uS", 16, 31),
+ (sequence "F%uS", 8, 15))>;
+
+// Class used to store 64-bit values in the upper half of a vector register.
+// The vector facility also includes scalar f64 instructions that operate
+// on the full vector register set.
+defm VR64 : SystemZRegClass<"VR64", [f64, v8i8, v4i16, v2i32, v2f32], 64,
+ (add (sequence "F%uD", 0, 7),
+ (sequence "F%uD", 16, 31),
+ (sequence "F%uD", 8, 15))>;
+
+// The subset of vector registers that can be used for floating-point
+// operations too.
+defm VF128 : SystemZRegClass<"VF128",
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+ (sequence "V%u", 0, 15)>;
+
+// All vector registers.
+defm VR128 : SystemZRegClass<"VR128",
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+ (add (sequence "V%u", 0, 7),
+ (sequence "V%u", 16, 31),
+ (sequence "V%u", 8, 15))>;
+
+// Attaches a ValueType to a register operand, to make the instruction
+// definitions easier.
+class TypedReg<ValueType vtin, RegisterOperand opin> {
+ ValueType vt = vtin;
+ RegisterOperand op = opin;
+}
+
+def v32eb : TypedReg<f32, VR32>;
+def v64g : TypedReg<i64, VR64>;
+def v64db : TypedReg<f64, VR64>;
+def v128b : TypedReg<v16i8, VR128>;
+def v128h : TypedReg<v8i16, VR128>;
+def v128f : TypedReg<v4i32, VR128>;
+def v128g : TypedReg<v2i64, VR128>;
+def v128q : TypedReg<v16i8, VR128>;
+def v128eb : TypedReg<v4f32, VR128>;
+def v128db : TypedReg<v2f64, VR128>;
+def v128any : TypedReg<untyped, VR128>;
//===----------------------------------------------------------------------===//
// Other registers
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index a3cba64b9ed20..e7e0268dbb8a1 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -46,10 +46,10 @@ static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence,
// number of straight-line MVCs as 6 * 256 - 1.
if (Size > 6 * 256)
return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src,
- DAG.getConstant(Size, PtrVT),
- DAG.getConstant(Size / 256, PtrVT));
+ DAG.getConstant(Size, DL, PtrVT),
+ DAG.getConstant(Size / 256, DL, PtrVT));
return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src,
- DAG.getConstant(Size, PtrVT));
+ DAG.getConstant(Size, DL, PtrVT));
}
SDValue SystemZSelectionDAGInfo::
@@ -78,7 +78,8 @@ static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
for (unsigned I = 1; I < Size; ++I)
StoreVal |= ByteVal << (I * 8);
return DAG.getStore(Chain, DL,
- DAG.getConstant(StoreVal, MVT::getIntegerVT(Size * 8)),
+ DAG.getConstant(StoreVal, DL,
+ MVT::getIntegerVT(Size * 8)),
Dst, DstPtrInfo, false, false, Align);
}
@@ -103,7 +104,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
// we can move at most 2 halfwords.
uint64_t ByteVal = CByte->getZExtValue();
if (ByteVal == 0 || ByteVal == 255 ?
- Bytes <= 16 && CountPopulation_64(Bytes) <= 2 :
+ Bytes <= 16 && countPopulation(Bytes) <= 2 :
Bytes <= 4) {
unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes);
unsigned Size2 = Bytes - Size1;
@@ -112,7 +113,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
if (Size2 == 0)
return Chain1;
Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
- DAG.getConstant(Size1, PtrVT));
+ DAG.getConstant(Size1, DL, PtrVT));
DstPtrInfo = DstPtrInfo.getWithOffset(Size1);
SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2,
std::min(Align, Size1), DstPtrInfo);
@@ -126,7 +127,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
if (Bytes == 1)
return Chain1;
SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
- DAG.getConstant(1, PtrVT));
+ DAG.getConstant(1, DL, PtrVT));
SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2,
DstPtrInfo.getWithOffset(1),
false, false, 1);
@@ -146,7 +147,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
false, false, Align);
SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
- DAG.getConstant(1, PtrVT));
+ DAG.getConstant(1, DL, PtrVT));
return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
Chain, DstPlus1, Dst, Bytes - 1);
}
@@ -169,10 +170,10 @@ static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
// needs 2 branches, whereas a straight-line sequence would need 3 or more.
if (Size > 3 * 256)
return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2,
- DAG.getConstant(Size, PtrVT),
- DAG.getConstant(Size / 256, PtrVT));
+ DAG.getConstant(Size, DL, PtrVT),
+ DAG.getConstant(Size / 256, DL, PtrVT));
return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2,
- DAG.getConstant(Size, PtrVT));
+ DAG.getConstant(Size, DL, PtrVT));
}
// Convert the current CC value into an integer that is 0 if CC == 0,
@@ -182,9 +183,9 @@ static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) {
SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
- DAG.getConstant(SystemZ::IPM_CC, MVT::i32));
+ DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL,
- DAG.getConstant(31, MVT::i32));
+ DAG.getConstant(31, DL, MVT::i32));
return ROTL;
}
@@ -213,7 +214,7 @@ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
Length = DAG.getZExtOrTrunc(Length, DL, PtrVT);
Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32);
Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char,
- DAG.getConstant(255, MVT::i32));
+ DAG.getConstant(255, DL, MVT::i32));
SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length);
SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
Limit, Src, Char);
@@ -222,12 +223,10 @@ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
// Now select between End and null, depending on whether the character
// was found.
- SmallVector<SDValue, 5> Ops;
- Ops.push_back(End);
- Ops.push_back(DAG.getConstant(0, PtrVT));
- Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST, MVT::i32));
- Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32));
- Ops.push_back(Glue);
+ SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT),
+ DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32),
+ DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32),
+ Glue};
VTs = DAG.getVTList(PtrVT, MVT::Glue);
End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
return std::make_pair(End, Chain);
@@ -240,7 +239,7 @@ EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
MachinePointerInfo SrcPtrInfo, bool isStpcpy) const {
SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other);
SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src,
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(0, DL, MVT::i32));
return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1));
}
@@ -251,7 +250,7 @@ EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
MachinePointerInfo Op2PtrInfo) const {
SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue);
SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2,
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(0, DL, MVT::i32));
Chain = Unused.getValue(1);
SDValue Glue = Chain.getValue(2);
return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
@@ -268,7 +267,7 @@ static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL,
EVT PtrVT = Src.getValueType();
SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
- Limit, Src, DAG.getConstant(0, MVT::i32));
+ Limit, Src, DAG.getConstant(0, DL, MVT::i32));
Chain = End.getValue(1);
SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src);
return std::make_pair(Len, Chain);
@@ -278,7 +277,7 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Src, MachinePointerInfo SrcPtrInfo) const {
EVT PtrVT = Src.getValueType();
- return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, PtrVT));
+ return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, DL, PtrVT));
}
std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index ec7a8c40d18a9..d1a17c5500d65 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -15,6 +15,7 @@
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
using namespace llvm;
@@ -36,6 +37,10 @@ public:
private:
bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther,
unsigned LLIxL, unsigned LLIxH);
+ bool shortenOn0(MachineInstr &MI, unsigned Opcode);
+ bool shortenOn01(MachineInstr &MI, unsigned Opcode);
+ bool shortenOn001(MachineInstr &MI, unsigned Opcode);
+ bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
const SystemZInstrInfo *TII;
@@ -97,6 +102,64 @@ bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap,
return false;
}
+// Change MI's opcode to Opcode if register operand 0 has a 4-bit encoding.
+bool SystemZShortenInst::shortenOn0(MachineInstr &MI, unsigned Opcode) {
+ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16) {
+ MI.setDesc(TII->get(Opcode));
+ return true;
+ }
+ return false;
+}
+
+// Change MI's opcode to Opcode if register operands 0 and 1 have a
+// 4-bit encoding.
+bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) {
+ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
+ SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) {
+ MI.setDesc(TII->get(Opcode));
+ return true;
+ }
+ return false;
+}
+
+// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a
+// 4-bit encoding and if operands 0 and 1 are tied.
+bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) {
+ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
+ MI.getOperand(1).getReg() == MI.getOperand(0).getReg() &&
+ SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) {
+ MI.setDesc(TII->get(Opcode));
+ return true;
+ }
+ return false;
+}
+
+// MI is a vector-style conversion instruction with the operand order:
+// destination, source, exact-suppress, rounding-mode. If both registers
+// have a 4-bit encoding then change it to Opcode, which has operand order:
+// destination, rouding-mode, source, exact-suppress.
+bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
+ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
+ SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) {
+ MachineOperand Dest(MI.getOperand(0));
+ MachineOperand Src(MI.getOperand(1));
+ MachineOperand Suppress(MI.getOperand(2));
+ MachineOperand Mode(MI.getOperand(3));
+ MI.RemoveOperand(3);
+ MI.RemoveOperand(2);
+ MI.RemoveOperand(1);
+ MI.RemoveOperand(0);
+ MI.setDesc(TII->get(Opcode));
+ MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
+ .addOperand(Dest)
+ .addOperand(Mode)
+ .addOperand(Src)
+ .addOperand(Suppress);
+ return true;
+ }
+ return false;
+}
+
// Process all instructions in MBB. Return true if something changed.
bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
@@ -117,13 +180,83 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
// Iterate backwards through the block looking for instructions to change.
for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) {
MachineInstr &MI = *MBBI;
- unsigned Opcode = MI.getOpcode();
- if (Opcode == SystemZ::IILF)
+ switch (MI.getOpcode()) {
+ case SystemZ::IILF:
Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL,
SystemZ::LLILH);
- else if (Opcode == SystemZ::IIHF)
+ break;
+
+ case SystemZ::IIHF:
Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL,
SystemZ::LLIHH);
+ break;
+
+ case SystemZ::WFADB:
+ Changed |= shortenOn001(MI, SystemZ::ADBR);
+ break;
+
+ case SystemZ::WFDDB:
+ Changed |= shortenOn001(MI, SystemZ::DDBR);
+ break;
+
+ case SystemZ::WFIDB:
+ Changed |= shortenFPConv(MI, SystemZ::FIDBRA);
+ break;
+
+ case SystemZ::WLDEB:
+ Changed |= shortenOn01(MI, SystemZ::LDEBR);
+ break;
+
+ case SystemZ::WLEDB:
+ Changed |= shortenFPConv(MI, SystemZ::LEDBRA);
+ break;
+
+ case SystemZ::WFMDB:
+ Changed |= shortenOn001(MI, SystemZ::MDBR);
+ break;
+
+ case SystemZ::WFLCDB:
+ Changed |= shortenOn01(MI, SystemZ::LCDBR);
+ break;
+
+ case SystemZ::WFLNDB:
+ Changed |= shortenOn01(MI, SystemZ::LNDBR);
+ break;
+
+ case SystemZ::WFLPDB:
+ Changed |= shortenOn01(MI, SystemZ::LPDBR);
+ break;
+
+ case SystemZ::WFSQDB:
+ Changed |= shortenOn01(MI, SystemZ::SQDBR);
+ break;
+
+ case SystemZ::WFSDB:
+ Changed |= shortenOn001(MI, SystemZ::SDBR);
+ break;
+
+ case SystemZ::WFCDB:
+ Changed |= shortenOn01(MI, SystemZ::CDBR);
+ break;
+
+ case SystemZ::VL32:
+ // For z13 we prefer LDE over LE to avoid partial register dependencies.
+ Changed |= shortenOn0(MI, SystemZ::LDE32);
+ break;
+
+ case SystemZ::VST32:
+ Changed |= shortenOn0(MI, SystemZ::STE);
+ break;
+
+ case SystemZ::VL64:
+ Changed |= shortenOn0(MI, SystemZ::LD);
+ break;
+
+ case SystemZ::VST64:
+ Changed |= shortenOn0(MI, SystemZ::STD);
+ break;
+ }
+
unsigned UsedLow = 0;
unsigned UsedHigh = 0;
for (auto MOI = MI.operands_begin(), MOE = MI.operands_end();
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index e160bc86f2251..05aede3deb4f3 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -10,7 +10,6 @@
#include "SystemZSubtarget.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/Support/Host.h"
using namespace llvm;
@@ -28,10 +27,6 @@ SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
std::string CPUName = CPU;
if (CPUName.empty())
CPUName = "generic";
-#if defined(__linux__) && defined(__s390x__)
- if (CPUName == "generic")
- CPUName = sys::getHostCPUName();
-#endif
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
return *this;
@@ -43,14 +38,12 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT,
const TargetMachine &TM)
: SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
- HasFastSerialization(false), HasInterlockedAccess1(false),
- TargetTriple(TT),
- // Make sure that global data has at least 16 bits of alignment by
- // default, so that we can refer to it using LARL. We don't have any
- // special requirements for stack variables though.
- DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
- TSInfo(DL), FrameLowering() {}
+ HasPopulationCount(false), HasFastSerialization(false),
+ HasInterlockedAccess1(false), HasMiscellaneousExtensions(false),
+ HasTransactionalExecution(false), HasProcessorAssist(false),
+ HasVector(false),
+ TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+ TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() {}
// Return true if GV binds locally under reloc model RM.
static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) {
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index f8815524e0f32..9a1f593f52657 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -38,12 +38,16 @@ protected:
bool HasLoadStoreOnCond;
bool HasHighWord;
bool HasFPExtension;
+ bool HasPopulationCount;
bool HasFastSerialization;
bool HasInterlockedAccess1;
+ bool HasMiscellaneousExtensions;
+ bool HasTransactionalExecution;
+ bool HasProcessorAssist;
+ bool HasVector;
private:
Triple TargetTriple;
- const DataLayout DL;
SystemZInstrInfo InstrInfo;
SystemZTargetLowering TLInfo;
SystemZSelectionDAGInfo TSInfo;
@@ -59,7 +63,6 @@ public:
return &FrameLowering;
}
const SystemZInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const DataLayout *getDataLayout() const override { return &DL; }
const SystemZRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
@@ -88,12 +91,29 @@ public:
// Return true if the target has the floating-point extension facility.
bool hasFPExtension() const { return HasFPExtension; }
+ // Return true if the target has the population-count facility.
+ bool hasPopulationCount() const { return HasPopulationCount; }
+
// Return true if the target has the fast-serialization facility.
bool hasFastSerialization() const { return HasFastSerialization; }
// Return true if the target has interlocked-access facility 1.
bool hasInterlockedAccess1() const { return HasInterlockedAccess1; }
+ // Return true if the target has the miscellaneous-extensions facility.
+ bool hasMiscellaneousExtensions() const {
+ return HasMiscellaneousExtensions;
+ }
+
+ // Return true if the target has the transactional-execution facility.
+ bool hasTransactionalExecution() const { return HasTransactionalExecution; }
+
+ // Return true if the target has the processor-assist facility.
+ bool hasProcessorAssist() const { return HasProcessorAssist; }
+
+ // Return true if the target has the vector facility.
+ bool hasVector() const { return HasVector; }
+
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index a210074484e7f..a34cdaf8030d2 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "SystemZTargetMachine.h"
+#include "SystemZTargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/Scalar.h"
@@ -20,12 +21,71 @@ extern "C" void LLVMInitializeSystemZTarget() {
RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
}
+// Determine whether we use the vector ABI.
+static bool UsesVectorABI(StringRef CPU, StringRef FS) {
+ // We use the vector ABI whenever the vector facility is avaiable.
+ // This is the case by default if CPU is z13 or later, and can be
+ // overridden via "[+-]vector" feature string elements.
+ bool VectorABI = true;
+ if (CPU.empty() || CPU == "generic" ||
+ CPU == "z10" || CPU == "z196" || CPU == "zEC12")
+ VectorABI = false;
+
+ SmallVector<StringRef, 3> Features;
+ FS.split(Features, ",", -1, false /* KeepEmpty */);
+ for (auto &Feature : Features) {
+ if (Feature == "vector" || Feature == "+vector")
+ VectorABI = true;
+ if (Feature == "-vector")
+ VectorABI = false;
+ }
+
+ return VectorABI;
+}
+
+static std::string computeDataLayout(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ const Triple Triple(TT);
+ bool VectorABI = UsesVectorABI(CPU, FS);
+ std::string Ret = "";
+
+ // Big endian.
+ Ret += "E";
+
+ // Data mangling.
+ Ret += DataLayout::getManglingComponent(Triple);
+
+ // Make sure that global data has at least 16 bits of alignment by
+ // default, so that we can refer to it using LARL. We don't have any
+ // special requirements for stack variables though.
+ Ret += "-i1:8:16-i8:8:16";
+
+ // 64-bit integers are naturally aligned.
+ Ret += "-i64:64";
+
+ // 128-bit floats are aligned only to 64 bits.
+ Ret += "-f128:64";
+
+ // When using the vector ABI, 128-bit vectors are also aligned to 64 bits.
+ if (VectorABI)
+ Ret += "-v128:64";
+
+ // We prefer 16 bits of aligned for all globals; see above.
+ Ret += "-a:8:16";
+
+ // Integer registers are 32 or 64 bits.
+ Ret += "-n32:64";
+
+ return Ret;
+}
+
SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS),
+ TT, CPU, FS, Options, RM, CM, OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
@@ -57,6 +117,10 @@ void SystemZPassConfig::addIRPasses() {
bool SystemZPassConfig::addInstSelector() {
addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel()));
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createSystemZLDCleanupPass(getSystemZTargetMachine()));
+
return false;
}
@@ -100,3 +164,9 @@ void SystemZPassConfig::addPreEmitPass() {
TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
return new SystemZPassConfig(this, PM);
}
+
+TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis([this](Function &F) {
+ return TargetTransformInfo(SystemZTTIImpl(this, F));
+ });
+}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 9fae5e43e754d..5ded07c1efb21 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -33,12 +33,13 @@ public:
CodeGenOpt::Level OL);
~SystemZTargetMachine() override;
- // Override TargetMachine.
- const SystemZSubtarget *getSubtargetImpl() const override {
+ const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const SystemZSubtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
// Override LLVMTargetMachine
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ TargetIRAnalysis getTargetIRAnalysis() override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
new file mode 100644
index 0000000000000..5a87df1976c34
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -0,0 +1,258 @@
+//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a TargetTransformInfo analysis pass specific to the
+// SystemZ target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "systemztti"
+
+//===----------------------------------------------------------------------===//
+//
+// SystemZ cost model.
+//
+//===----------------------------------------------------------------------===//
+
+unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return TCC_Free
+ // here, so that constant hoisting will ignore this constant.
+ if (BitSize == 0)
+ return TTI::TCC_Free;
+ // No cost model for operations on integers larger than 64 bit implemented yet.
+ if (BitSize > 64)
+ return TTI::TCC_Free;
+
+ if (Imm == 0)
+ return TTI::TCC_Free;
+
+ if (Imm.getBitWidth() <= 64) {
+ // Constants loaded via lgfi.
+ if (isInt<32>(Imm.getSExtValue()))
+ return TTI::TCC_Basic;
+ // Constants loaded via llilf.
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Basic;
+ // Constants loaded via llihf:
+ if ((Imm.getZExtValue() & 0xffffffff) == 0)
+ return TTI::TCC_Basic;
+
+ return 2 * TTI::TCC_Basic;
+ }
+
+ return 4 * TTI::TCC_Basic;
+}
+
+unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return TCC_Free
+ // here, so that constant hoisting will ignore this constant.
+ if (BitSize == 0)
+ return TTI::TCC_Free;
+ // No cost model for operations on integers larger than 64 bit implemented yet.
+ if (BitSize > 64)
+ return TTI::TCC_Free;
+
+ switch (Opcode) {
+ default:
+ return TTI::TCC_Free;
+ case Instruction::GetElementPtr:
+ // Always hoist the base address of a GetElementPtr. This prevents the
+ // creation of new constants for every base constant that gets constant
+ // folded with the offset.
+ if (Idx == 0)
+ return 2 * TTI::TCC_Basic;
+ return TTI::TCC_Free;
+ case Instruction::Store:
+ if (Idx == 0 && Imm.getBitWidth() <= 64) {
+ // Any 8-bit immediate store can by implemented via mvi.
+ if (BitSize == 8)
+ return TTI::TCC_Free;
+ // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi.
+ if (isInt<16>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::ICmp:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // Comparisons against signed 32-bit immediates implemented via cgfi.
+ if (isInt<32>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ // Comparisons against unsigned 32-bit immediates implemented via clgfi.
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // We use algfi/slgfi to add/subtract 32-bit unsigned immediates.
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ // Or their negation, by swapping addition vs. subtraction.
+ if (isUInt<32>(-Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::Mul:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // We use msgfi to multiply by 32-bit signed immediates.
+ if (isInt<32>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::Or:
+ case Instruction::Xor:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // Masks supported by oilf/xilf.
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ // Masks supported by oihf/xihf.
+ if ((Imm.getZExtValue() & 0xffffffff) == 0)
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::And:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // Any 32-bit AND operation can by implemented via nilf.
+ if (BitSize <= 32)
+ return TTI::TCC_Free;
+ // 64-bit masks supported by nilf.
+ if (isUInt<32>(~Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ // 64-bit masks supported by nilh.
+ if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff)
+ return TTI::TCC_Free;
+ // Some 64-bit AND operations can be implemented via risbg.
+ const SystemZInstrInfo *TII = ST->getInstrInfo();
+ unsigned Start, End;
+ if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ // Always return TCC_Free for the shift value of a shift instruction.
+ if (Idx == 1)
+ return TTI::TCC_Free;
+ break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ case Instruction::BitCast:
+ case Instruction::PHI:
+ case Instruction::Call:
+ case Instruction::Select:
+ case Instruction::Ret:
+ case Instruction::Load:
+ break;
+ }
+
+ return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+}
+
+unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return TCC_Free
+ // here, so that constant hoisting will ignore this constant.
+ if (BitSize == 0)
+ return TTI::TCC_Free;
+ // No cost model for operations on integers larger than 64 bit implemented yet.
+ if (BitSize > 64)
+ return TTI::TCC_Free;
+
+ switch (IID) {
+ default:
+ return TTI::TCC_Free;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ // These get expanded to include a normal addition/subtraction.
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ if (isUInt<32>(-Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ // These get expanded to include a normal multiplication.
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ if (isInt<32>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Intrinsic::experimental_stackmap:
+ if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TTI::TCC_Free;
+ break;
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TTI::TCC_Free;
+ break;
+ }
+ return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+}
+
+TargetTransformInfo::PopcntSupportKind
+SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
+ assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2");
+ if (ST->hasPopulationCount() && TyWidth <= 64)
+ return TTI::PSK_FastHardware;
+ return TTI::PSK_Software;
+}
+
+unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
+ if (!Vector)
+ // Discount the stack pointer. Also leave out %r0, since it can't
+ // be used in an address.
+ return 14;
+ if (ST->hasVector())
+ return 32;
+ return 0;
+}
+
+unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) {
+ if (!Vector)
+ return 64;
+ if (ST->hasVector())
+ return 128;
+ return 0;
+}
+
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
new file mode 100644
index 0000000000000..e9cabe968eea2
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -0,0 +1,78 @@
+//===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
+
+#include "SystemZTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+
+namespace llvm {
+
+class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
+ typedef BasicTTIImplBase<SystemZTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const SystemZSubtarget *ST;
+ const SystemZTargetLowering *TLI;
+
+ const SystemZSubtarget *getST() const { return ST; }
+ const SystemZTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F)
+ : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ SystemZTTIImpl(const SystemZTTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+ SystemZTTIImpl(SystemZTTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+ TLI(std::move(Arg.TLI)) {}
+ SystemZTTIImpl &operator=(const SystemZTTIImpl &RHS) {
+ BaseT::operator=(static_cast<const BaseT &>(RHS));
+ ST = RHS.ST;
+ TLI = RHS.TLI;
+ return *this;
+ }
+ SystemZTTIImpl &operator=(SystemZTTIImpl &&RHS) {
+ BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+ ST = std::move(RHS.ST);
+ TLI = std::move(RHS.TLI);
+ return *this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+
+ unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+ unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ unsigned getNumberOfRegisters(bool Vector);
+ unsigned getRegisterBitWidth(bool Vector);
+
+ /// @}
+};
+
+} // end namespace llvm
+
+#endif