diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2015-05-27 18:44:32 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2015-05-27 18:44:32 +0000 |
| commit | 5a5ac124e1efaf208671f01c46edb15f29ed2a0b (patch) | |
| tree | a6140557876943cdd800ee997c9317283394b22c /lib/Target/SystemZ | |
| parent | f03b5bed27d0d2eafd68562ce14f8b5e3f1f0801 (diff) | |
Notes
Diffstat (limited to 'lib/Target/SystemZ')
50 files changed, 7042 insertions, 551 deletions
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index cb528db9db51c..b721def54e126 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -39,13 +39,17 @@ enum RegisterKind { ADDR64Reg, FP32Reg, FP64Reg, - FP128Reg + FP128Reg, + VR32Reg, + VR64Reg, + VR128Reg }; enum MemoryKind { BDMem, BDXMem, - BDLMem + BDLMem, + BDVMem }; class SystemZOperand : public MCParsedAsmOperand { @@ -57,6 +61,7 @@ private: KindReg, KindAccessReg, KindImm, + KindImmTLS, KindMem }; @@ -84,34 +89,42 @@ private: }; // Base + Disp + Index, where Base and Index are LLVM registers or 0. - // RegKind says what type the registers have (ADDR32Reg or ADDR64Reg). - // Length is the operand length for D(L,B)-style operands, otherwise - // it is null. + // MemKind says what type of memory this is and RegKind says what type + // the base register has (ADDR32Reg or ADDR64Reg). Length is the operand + // length for D(L,B)-style operands, otherwise it is null. struct MemOp { - unsigned Base : 8; - unsigned Index : 8; - unsigned RegKind : 8; - unsigned Unused : 8; + unsigned Base : 12; + unsigned Index : 12; + unsigned MemKind : 4; + unsigned RegKind : 4; const MCExpr *Disp; const MCExpr *Length; }; + // Imm is an immediate operand, and Sym is an optional TLS symbol + // for use with a __tls_get_offset marker relocation. + struct ImmTLSOp { + const MCExpr *Imm; + const MCExpr *Sym; + }; + union { TokenOp Token; RegOp Reg; unsigned AccessReg; const MCExpr *Imm; + ImmTLSOp ImmTLS; MemOp Mem; }; void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. if (!Expr) - Inst.addOperand(MCOperand::CreateImm(0)); + Inst.addOperand(MCOperand::createImm(0)); else if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + Inst.addOperand(MCOperand::createImm(CE->getValue())); else - Inst.addOperand(MCOperand::CreateExpr(Expr)); + Inst.addOperand(MCOperand::createExpr(Expr)); } public: @@ -149,10 +162,11 @@ public: return Op; } static std::unique_ptr<SystemZOperand> - createMem(RegisterKind RegKind, unsigned Base, const MCExpr *Disp, - unsigned Index, const MCExpr *Length, SMLoc StartLoc, - SMLoc EndLoc) { + createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base, + const MCExpr *Disp, unsigned Index, const MCExpr *Length, + SMLoc StartLoc, SMLoc EndLoc) { auto Op = make_unique<SystemZOperand>(KindMem, StartLoc, EndLoc); + Op->Mem.MemKind = MemKind; Op->Mem.RegKind = RegKind; Op->Mem.Base = Base; Op->Mem.Index = Index; @@ -160,6 +174,14 @@ public: Op->Mem.Length = Length; return Op; } + static std::unique_ptr<SystemZOperand> + createImmTLS(const MCExpr *Imm, const MCExpr *Sym, + SMLoc StartLoc, SMLoc EndLoc) { + auto Op = make_unique<SystemZOperand>(KindImmTLS, StartLoc, EndLoc); + Op->ImmTLS.Imm = Imm; + Op->ImmTLS.Sym = Sym; + return Op; + } // Token operands bool isToken() const override { @@ -200,24 +222,40 @@ public: return Imm; } + // Immediate operands with optional TLS symbol. + bool isImmTLS() const { + return Kind == KindImmTLS; + } + // Memory operands. bool isMem() const override { return Kind == KindMem; } - bool isMem(RegisterKind RegKind, MemoryKind MemKind) const { + bool isMem(MemoryKind MemKind) const { return (Kind == KindMem && - Mem.RegKind == RegKind && - (MemKind == BDXMem || !Mem.Index) && - (MemKind == BDLMem) == (Mem.Length != nullptr)); + (Mem.MemKind == MemKind || + // A BDMem can be treated as a BDXMem in which the index + // register field is 0. + (Mem.MemKind == BDMem && MemKind == BDXMem))); + } + bool isMem(MemoryKind MemKind, RegisterKind RegKind) const { + return isMem(MemKind) && Mem.RegKind == RegKind; } - bool isMemDisp12(RegisterKind RegKind, MemoryKind MemKind) const { - return isMem(RegKind, MemKind) && inRange(Mem.Disp, 0, 0xfff); + bool isMemDisp12(MemoryKind MemKind, RegisterKind RegKind) const { + return isMem(MemKind, RegKind) && inRange(Mem.Disp, 0, 0xfff); } - bool isMemDisp20(RegisterKind RegKind, MemoryKind MemKind) const { - return isMem(RegKind, MemKind) && inRange(Mem.Disp, -524288, 524287); + bool isMemDisp20(MemoryKind MemKind, RegisterKind RegKind) const { + return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287); } bool isMemDisp12Len8(RegisterKind RegKind) const { - return isMemDisp12(RegKind, BDLMem) && inRange(Mem.Length, 1, 0x100); + return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length, 1, 0x100); + } + void addBDVAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands"); + assert(isMem(BDVMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::createReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + Inst.addOperand(MCOperand::createReg(Mem.Index)); } // Override MCParsedAsmOperand. @@ -229,12 +267,12 @@ public: // to an instruction. void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands"); - Inst.addOperand(MCOperand::CreateReg(getReg())); + Inst.addOperand(MCOperand::createReg(getReg())); } void addAccessRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands"); assert(Kind == KindAccessReg && "Invalid operand type"); - Inst.addOperand(MCOperand::CreateImm(AccessReg)); + Inst.addOperand(MCOperand::createImm(AccessReg)); } void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands"); @@ -242,24 +280,31 @@ public: } void addBDAddrOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands"); - assert(Kind == KindMem && Mem.Index == 0 && "Invalid operand type"); - Inst.addOperand(MCOperand::CreateReg(Mem.Base)); + assert(isMem(BDMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::createReg(Mem.Base)); addExpr(Inst, Mem.Disp); } void addBDXAddrOperands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands"); - assert(Kind == KindMem && "Invalid operand type"); - Inst.addOperand(MCOperand::CreateReg(Mem.Base)); + assert(isMem(BDXMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::createReg(Mem.Base)); addExpr(Inst, Mem.Disp); - Inst.addOperand(MCOperand::CreateReg(Mem.Index)); + Inst.addOperand(MCOperand::createReg(Mem.Index)); } void addBDLAddrOperands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands"); - assert(Kind == KindMem && "Invalid operand type"); - Inst.addOperand(MCOperand::CreateReg(Mem.Base)); + assert(isMem(BDLMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::createReg(Mem.Base)); addExpr(Inst, Mem.Disp); addExpr(Inst, Mem.Length); } + void addImmTLSOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands"); + assert(Kind == KindImmTLS && "Invalid operand type"); + addExpr(Inst, ImmTLS.Imm); + if (ImmTLS.Sym) + addExpr(Inst, ImmTLS.Sym); + } // Used by the TableGen code to check for particular operand types. bool isGR32() const { return isReg(GR32Reg); } @@ -273,17 +318,26 @@ public: bool isFP32() const { return isReg(FP32Reg); } bool isFP64() const { return isReg(FP64Reg); } bool isFP128() const { return isReg(FP128Reg); } - bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, BDMem); } - bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, BDMem); } - bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDMem); } - bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDMem); } - bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDXMem); } - bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDXMem); } + bool isVR32() const { return isReg(VR32Reg); } + bool isVR64() const { return isReg(VR64Reg); } + bool isVF128() const { return false; } + bool isVR128() const { return isReg(VR128Reg); } + bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); } + bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); } + bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, ADDR64Reg); } + bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, ADDR64Reg); } + bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, ADDR64Reg); } + bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, ADDR64Reg); } bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); } + bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, ADDR64Reg); } + bool isU1Imm() const { return isImm(0, 1); } + bool isU2Imm() const { return isImm(0, 3); } + bool isU3Imm() const { return isImm(0, 7); } bool isU4Imm() const { return isImm(0, 15); } bool isU6Imm() const { return isImm(0, 63); } bool isU8Imm() const { return isImm(0, 255); } bool isS8Imm() const { return isImm(-128, 127); } + bool isU12Imm() const { return isImm(0, 4095); } bool isU16Imm() const { return isImm(0, 65535); } bool isS16Imm() const { return isImm(-32768, 32767); } bool isU32Imm() const { return isImm(0, (1LL << 32) - 1); } @@ -300,6 +354,7 @@ private: enum RegisterGroup { RegGR, RegFP, + RegV, RegAccess }; struct Register { @@ -318,12 +373,15 @@ private: RegisterKind Kind); bool parseAddress(unsigned &Base, const MCExpr *&Disp, - unsigned &Index, const MCExpr *&Length, + unsigned &Index, bool &IsVector, const MCExpr *&Length, const unsigned *Regs, RegisterKind RegKind); OperandMatchResultTy parseAddress(OperandVector &Operands, - const unsigned *Regs, RegisterKind RegKind, - MemoryKind MemKind); + MemoryKind MemKind, const unsigned *Regs, + RegisterKind RegKind); + + OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal, + int64_t MaxVal, bool AllowTLS); bool parseOperand(OperandVector &Operands, StringRef Mnemonic); @@ -382,26 +440,45 @@ public: OperandMatchResultTy parseFP128(OperandVector &Operands) { return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg); } + OperandMatchResultTy parseVR32(OperandVector &Operands) { + return parseRegister(Operands, RegV, SystemZMC::VR32Regs, VR32Reg); + } + OperandMatchResultTy parseVR64(OperandVector &Operands) { + return parseRegister(Operands, RegV, SystemZMC::VR64Regs, VR64Reg); + } + OperandMatchResultTy parseVF128(OperandVector &Operands) { + llvm_unreachable("Shouldn't be used as an operand"); + } + OperandMatchResultTy parseVR128(OperandVector &Operands) { + return parseRegister(Operands, RegV, SystemZMC::VR128Regs, VR128Reg); + } OperandMatchResultTy parseBDAddr32(OperandVector &Operands) { - return parseAddress(Operands, SystemZMC::GR32Regs, ADDR32Reg, BDMem); + return parseAddress(Operands, BDMem, SystemZMC::GR32Regs, ADDR32Reg); } OperandMatchResultTy parseBDAddr64(OperandVector &Operands) { - return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDMem); + return parseAddress(Operands, BDMem, SystemZMC::GR64Regs, ADDR64Reg); } OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) { - return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDXMem); + return parseAddress(Operands, BDXMem, SystemZMC::GR64Regs, ADDR64Reg); } OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) { - return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDLMem); + return parseAddress(Operands, BDLMem, SystemZMC::GR64Regs, ADDR64Reg); + } + OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) { + return parseAddress(Operands, BDVMem, SystemZMC::GR64Regs, ADDR64Reg); } OperandMatchResultTy parseAccessReg(OperandVector &Operands); - OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal, - int64_t MaxVal); OperandMatchResultTy parsePCRel16(OperandVector &Operands) { - return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1); + return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, false); } OperandMatchResultTy parsePCRel32(OperandVector &Operands) { - return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1); + return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, false); + } + OperandMatchResultTy parsePCRelTLS16(OperandVector &Operands) { + return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, true); + } + OperandMatchResultTy parsePCRelTLS32(OperandVector &Operands) { + return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, true); } }; } // end anonymous namespace @@ -443,6 +520,8 @@ bool SystemZAsmParser::parseRegister(Register &Reg) { Reg.Group = RegGR; else if (Prefix == 'f' && Reg.Num < 16) Reg.Group = RegFP; + else if (Prefix == 'v' && Reg.Num < 32) + Reg.Group = RegV; else if (Prefix == 'a' && Reg.Num < 16) Reg.Group = RegAccess; else @@ -493,8 +572,8 @@ SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group, // Regs maps asm register numbers to LLVM register numbers and RegKind // says what kind of address register we're using (ADDR32Reg or ADDR64Reg). bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, - unsigned &Index, const MCExpr *&Length, - const unsigned *Regs, + unsigned &Index, bool &IsVector, + const MCExpr *&Length, const unsigned *Regs, RegisterKind RegKind) { // Parse the displacement, which must always be present. if (getParser().parseExpression(Disp)) @@ -503,6 +582,7 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, // Parse the optional base and index. Index = 0; Base = 0; + IsVector = false; Length = nullptr; if (getLexer().is(AsmToken::LParen)) { Parser.Lex(); @@ -510,12 +590,23 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, if (getLexer().is(AsmToken::Percent)) { // Parse the first register and decide whether it's a base or an index. Register Reg; - if (parseRegister(Reg, RegGR, Regs, RegKind)) + if (parseRegister(Reg)) return true; - if (getLexer().is(AsmToken::Comma)) - Index = Reg.Num; - else - Base = Reg.Num; + if (Reg.Group == RegV) { + // A vector index register. The base register is optional. + IsVector = true; + Index = SystemZMC::VR128Regs[Reg.Num]; + } else if (Reg.Group == RegGR) { + if (Reg.Num == 0) + return Error(Reg.StartLoc, "%r0 used in an address"); + // If the are two registers, the first one is the index and the + // second is the base. + if (getLexer().is(AsmToken::Comma)) + Index = Regs[Reg.Num]; + else + Base = Regs[Reg.Num]; + } else + return Error(Reg.StartLoc, "invalid address register"); } else { // Parse the length. if (getParser().parseExpression(Length)) @@ -542,37 +633,46 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, // Parse a memory operand and add it to Operands. The other arguments // are as above. SystemZAsmParser::OperandMatchResultTy -SystemZAsmParser::parseAddress(OperandVector &Operands, const unsigned *Regs, - RegisterKind RegKind, MemoryKind MemKind) { +SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind, + const unsigned *Regs, RegisterKind RegKind) { SMLoc StartLoc = Parser.getTok().getLoc(); unsigned Base, Index; + bool IsVector; const MCExpr *Disp; const MCExpr *Length; - if (parseAddress(Base, Disp, Index, Length, Regs, RegKind)) + if (parseAddress(Base, Disp, Index, IsVector, Length, Regs, RegKind)) return MatchOperand_ParseFail; - if (Index && MemKind != BDXMem) - { - Error(StartLoc, "invalid use of indexed addressing"); - return MatchOperand_ParseFail; - } + if (IsVector && MemKind != BDVMem) { + Error(StartLoc, "invalid use of vector addressing"); + return MatchOperand_ParseFail; + } - if (Length && MemKind != BDLMem) - { - Error(StartLoc, "invalid use of length addressing"); - return MatchOperand_ParseFail; - } + if (!IsVector && MemKind == BDVMem) { + Error(StartLoc, "vector index required in address"); + return MatchOperand_ParseFail; + } - if (!Length && MemKind == BDLMem) - { - Error(StartLoc, "missing length in address"); - return MatchOperand_ParseFail; - } + if (Index && MemKind != BDXMem && MemKind != BDVMem) { + Error(StartLoc, "invalid use of indexed addressing"); + return MatchOperand_ParseFail; + } + + if (Length && MemKind != BDLMem) { + Error(StartLoc, "invalid use of length addressing"); + return MatchOperand_ParseFail; + } + + if (!Length && MemKind == BDLMem) { + Error(StartLoc, "missing length in address"); + return MatchOperand_ParseFail; + } SMLoc EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back(SystemZOperand::createMem(RegKind, Base, Disp, Index, - Length, StartLoc, EndLoc)); + Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp, + Index, Length, StartLoc, + EndLoc)); return MatchOperand_Success; } @@ -589,6 +689,8 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, RegNo = SystemZMC::GR64Regs[Reg.Num]; else if (Reg.Group == RegFP) RegNo = SystemZMC::FP64Regs[Reg.Num]; + else if (Reg.Group == RegV) + RegNo = SystemZMC::VR128Regs[Reg.Num]; else // FIXME: Access registers aren't modelled as LLVM registers yet. return Error(Reg.StartLoc, "invalid operand for instruction"); @@ -661,8 +763,10 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands, // so we treat any plain expression as an immediate. SMLoc StartLoc = Parser.getTok().getLoc(); unsigned Base, Index; + bool IsVector; const MCExpr *Expr, *Length; - if (parseAddress(Base, Expr, Index, Length, SystemZMC::GR64Regs, ADDR64Reg)) + if (parseAddress(Base, Expr, Index, IsVector, Length, SystemZMC::GR64Regs, + ADDR64Reg)) return true; SMLoc EndLoc = @@ -743,7 +847,7 @@ SystemZAsmParser::parseAccessReg(OperandVector &Operands) { SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, - int64_t MaxVal) { + int64_t MaxVal, bool AllowTLS) { MCContext &Ctx = getContext(); MCStreamer &Out = getStreamer(); const MCExpr *Expr; @@ -759,16 +863,61 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, Error(StartLoc, "offset out of range"); return MatchOperand_ParseFail; } - MCSymbol *Sym = Ctx.CreateTempSymbol(); + MCSymbol *Sym = Ctx.createTempSymbol(); Out.EmitLabel(Sym); const MCExpr *Base = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, Ctx); Expr = Value == 0 ? Base : MCBinaryExpr::CreateAdd(Base, Expr, Ctx); } + // Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol. + const MCExpr *Sym = nullptr; + if (AllowTLS && getLexer().is(AsmToken::Colon)) { + Parser.Lex(); + + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Error(Parser.getTok().getLoc(), "unexpected token"); + return MatchOperand_ParseFail; + } + + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + StringRef Name = Parser.getTok().getString(); + if (Name == "tls_gdcall") + Kind = MCSymbolRefExpr::VK_TLSGD; + else if (Name == "tls_ldcall") + Kind = MCSymbolRefExpr::VK_TLSLDM; + else { + Error(Parser.getTok().getLoc(), "unknown TLS tag"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + + if (Parser.getTok().isNot(AsmToken::Colon)) { + Error(Parser.getTok().getLoc(), "unexpected token"); + return MatchOperand_ParseFail; + } + Parser.Lex(); + + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Error(Parser.getTok().getLoc(), "unexpected token"); + return MatchOperand_ParseFail; + } + + StringRef Identifier = Parser.getTok().getString(); + Sym = MCSymbolRefExpr::Create(Ctx.getOrCreateSymbol(Identifier), + Kind, Ctx); + Parser.Lex(); + } + SMLoc EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); + + if (AllowTLS) + Operands.push_back(SystemZOperand::createImmTLS(Expr, Sym, + StartLoc, EndLoc)); + else + Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); + return MatchOperand_Success; } diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index 41a614d9d151a..336f037bb7330 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_target(SystemZCodeGen SystemZISelDAGToDAG.cpp SystemZISelLowering.cpp SystemZInstrInfo.cpp + SystemZLDCleanup.cpp SystemZLongBranch.cpp SystemZMachineFunctionInfo.cpp SystemZMCInstLower.cpp @@ -28,6 +29,7 @@ add_llvm_target(SystemZCodeGen SystemZShortenInst.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp + SystemZTargetTransformInfo.cpp ) add_subdirectory(AsmParser) diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index 23173bfbd91bb..bf67b75d53377 100644 --- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -25,7 +25,7 @@ class SystemZDisassembler : public MCDisassembler { public: SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} - virtual ~SystemZDisassembler() {} + ~SystemZDisassembler() override {} DecodeStatus getInstruction(MCInst &instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, @@ -47,74 +47,94 @@ extern "C" void LLVMInitializeSystemZDisassembler() { } static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, - const unsigned *Regs) { - assert(RegNo < 16 && "Invalid register"); + const unsigned *Regs, unsigned Size) { + assert(RegNo < Size && "Invalid register"); RegNo = Regs[RegNo]; if (RegNo == 0) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(RegNo)); + Inst.addOperand(MCOperand::createReg(RegNo)); return MCDisassembler::Success; } static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs); + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs, 16); } static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs); + return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs, 16); } static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs); + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16); } static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs); + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs, 16); } static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs); + return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16); } static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs); + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs, 16); } static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs); + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs, 16); } static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs); + return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs, 16); +} + +static DecodeStatus DecodeVR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::VR32Regs, 32); +} + +static DecodeStatus DecodeVR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::VR64Regs, 32); +} + +static DecodeStatus DecodeVR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::VR128Regs, 32); } template<unsigned N> static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) { - assert(isUInt<N>(Imm) && "Invalid immediate"); - Inst.addOperand(MCOperand::CreateImm(Imm)); + if (!isUInt<N>(Imm)) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createImm(Imm)); return MCDisassembler::Success; } template<unsigned N> static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) { - assert(isUInt<N>(Imm) && "Invalid immediate"); - Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm))); + if (!isUInt<N>(Imm)) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm))); return MCDisassembler::Success; } @@ -124,6 +144,21 @@ static DecodeStatus decodeAccessRegOperand(MCInst &Inst, uint64_t Imm, return decodeUImmOperand<4>(Inst, Imm); } +static DecodeStatus decodeU1ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<1>(Inst, Imm); +} + +static DecodeStatus decodeU2ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<2>(Inst, Imm); +} + +static DecodeStatus decodeU3ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<3>(Inst, Imm); +} + static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm, uint64_t Address, const void *Decoder) { return decodeUImmOperand<4>(Inst, Imm); @@ -139,6 +174,11 @@ static DecodeStatus decodeU8ImmOperand(MCInst &Inst, uint64_t Imm, return decodeUImmOperand<8>(Inst, Imm); } +static DecodeStatus decodeU12ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<12>(Inst, Imm); +} + static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm, uint64_t Address, const void *Decoder) { return decodeUImmOperand<16>(Inst, Imm); @@ -168,7 +208,7 @@ template<unsigned N> static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm, uint64_t Address) { assert(isUInt<N>(Imm) && "Invalid PC-relative offset"); - Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm) * 2 + Address)); + Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm) * 2 + Address)); return MCDisassembler::Success; } @@ -189,8 +229,8 @@ static DecodeStatus decodeBDAddr12Operand(MCInst &Inst, uint64_t Field, uint64_t Base = Field >> 12; uint64_t Disp = Field & 0xfff; assert(Base < 16 && "Invalid BDAddr12"); - Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); - Inst.addOperand(MCOperand::CreateImm(Disp)); + Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::createImm(Disp)); return MCDisassembler::Success; } @@ -199,8 +239,8 @@ static DecodeStatus decodeBDAddr20Operand(MCInst &Inst, uint64_t Field, uint64_t Base = Field >> 20; uint64_t Disp = ((Field << 12) & 0xff000) | ((Field >> 8) & 0xfff); assert(Base < 16 && "Invalid BDAddr20"); - Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); - Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp))); + Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::createImm(SignExtend64<20>(Disp))); return MCDisassembler::Success; } @@ -210,9 +250,9 @@ static DecodeStatus decodeBDXAddr12Operand(MCInst &Inst, uint64_t Field, uint64_t Base = (Field >> 12) & 0xf; uint64_t Disp = Field & 0xfff; assert(Index < 16 && "Invalid BDXAddr12"); - Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); - Inst.addOperand(MCOperand::CreateImm(Disp)); - Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index])); + Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::createImm(Disp)); + Inst.addOperand(MCOperand::createReg(Index == 0 ? 0 : Regs[Index])); return MCDisassembler::Success; } @@ -222,9 +262,9 @@ static DecodeStatus decodeBDXAddr20Operand(MCInst &Inst, uint64_t Field, uint64_t Base = (Field >> 20) & 0xf; uint64_t Disp = ((Field & 0xfff00) >> 8) | ((Field & 0xff) << 12); assert(Index < 16 && "Invalid BDXAddr20"); - Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); - Inst.addOperand(MCOperand::CreateImm(SignExtend64<20>(Disp))); - Inst.addOperand(MCOperand::CreateReg(Index == 0 ? 0 : Regs[Index])); + Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::createImm(SignExtend64<20>(Disp))); + Inst.addOperand(MCOperand::createReg(Index == 0 ? 0 : Regs[Index])); return MCDisassembler::Success; } @@ -234,9 +274,21 @@ static DecodeStatus decodeBDLAddr12Len8Operand(MCInst &Inst, uint64_t Field, uint64_t Base = (Field >> 12) & 0xf; uint64_t Disp = Field & 0xfff; assert(Length < 256 && "Invalid BDLAddr12Len8"); - Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); - Inst.addOperand(MCOperand::CreateImm(Disp)); - Inst.addOperand(MCOperand::CreateImm(Length + 1)); + Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::createImm(Disp)); + Inst.addOperand(MCOperand::createImm(Length + 1)); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBDVAddr12Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Index = Field >> 16; + uint64_t Base = (Field >> 12) & 0xf; + uint64_t Disp = Field & 0xfff; + assert(Index < 32 && "Invalid BDVAddr12"); + Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::createImm(Disp)); + Inst.addOperand(MCOperand::createReg(SystemZMC::VR128Regs[Index])); return MCDisassembler::Success; } @@ -283,6 +335,12 @@ static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst, return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs); } +static DecodeStatus decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDVAddr12Operand(Inst, Field, SystemZMC::GR64Regs); +} + #include "SystemZGenDisassemblerTables.inc" DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size, diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp index d2ba9b6f54c35..373ddfa7e2577 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -9,7 +9,10 @@ #include "SystemZInstPrinter.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -21,13 +24,17 @@ using namespace llvm; void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp, unsigned Index, raw_ostream &O) { O << Disp; - if (Base) { + if (Base || Index) { O << '('; - if (Index) - O << '%' << getRegisterName(Index) << ','; - O << '%' << getRegisterName(Base) << ')'; - } else - assert(!Index && "Shouldn't have an index without a base"); + if (Index) { + O << '%' << getRegisterName(Index); + if (Base) + O << ','; + } + if (Base) + O << '%' << getRegisterName(Base); + O << ')'; + } } void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) { @@ -42,7 +49,8 @@ void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) { } void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { + StringRef Annot, + const MCSubtargetInfo &STI) { printInstruction(MI, O); printAnnotation(O, Annot); } @@ -51,60 +59,78 @@ void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { O << '%' << getRegisterName(RegNo); } -void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum, - raw_ostream &O) { +template<unsigned N> +void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { int64_t Value = MI->getOperand(OpNum).getImm(); - assert(isUInt<4>(Value) && "Invalid u4imm argument"); + assert(isUInt<N>(Value) && "Invalid uimm argument"); O << Value; } -void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum, - raw_ostream &O) { +template<unsigned N> +void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { int64_t Value = MI->getOperand(OpNum).getImm(); - assert(isUInt<6>(Value) && "Invalid u6imm argument"); + assert(isInt<N>(Value) && "Invalid simm argument"); O << Value; } +void SystemZInstPrinter::printU1ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<1>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU2ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<2>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU3ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<3>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<4>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<6>(MI, OpNum, O); +} + void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { - int64_t Value = MI->getOperand(OpNum).getImm(); - assert(isInt<8>(Value) && "Invalid s8imm argument"); - O << Value; + printSImmOperand<8>(MI, OpNum, O); } void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { - int64_t Value = MI->getOperand(OpNum).getImm(); - assert(isUInt<8>(Value) && "Invalid u8imm argument"); - O << Value; + printUImmOperand<8>(MI, OpNum, O); +} + +void SystemZInstPrinter::printU12ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printUImmOperand<12>(MI, OpNum, O); } void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { - int64_t Value = MI->getOperand(OpNum).getImm(); - assert(isInt<16>(Value) && "Invalid s16imm argument"); - O << Value; + printSImmOperand<16>(MI, OpNum, O); } void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { - int64_t Value = MI->getOperand(OpNum).getImm(); - assert(isUInt<16>(Value) && "Invalid u16imm argument"); - O << Value; + printUImmOperand<16>(MI, OpNum, O); } void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { - int64_t Value = MI->getOperand(OpNum).getImm(); - assert(isInt<32>(Value) && "Invalid s32imm argument"); - O << Value; + printSImmOperand<32>(MI, OpNum, O); } void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { - int64_t Value = MI->getOperand(OpNum).getImm(); - assert(isUInt<32>(Value) && "Invalid u32imm argument"); - O << Value; + printUImmOperand<32>(MI, OpNum, O); } void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum, @@ -124,6 +150,29 @@ void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum, O << *MO.getExpr(); } +void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + // Output the PC-relative operand. + printPCRelOperand(MI, OpNum, O); + + // Output the TLS marker if present. + if ((unsigned)OpNum + 1 < MI->getNumOperands()) { + const MCOperand &MO = MI->getOperand(OpNum + 1); + const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*MO.getExpr()); + switch (refExp.getKind()) { + case MCSymbolRefExpr::VK_TLSGD: + O << ":tls_gdcall:"; + break; + case MCSymbolRefExpr::VK_TLSLDM: + O << ":tls_ldcall:"; + break; + default: + llvm_unreachable("Unexpected symbol kind"); + } + O << refExp.getSymbol().getName(); + } +} + void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum, raw_ostream &O) { printOperand(MI->getOperand(OpNum), O); @@ -153,6 +202,13 @@ void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum, O << ')'; } +void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printAddress(MI->getOperand(OpNum).getReg(), + MI->getOperand(OpNum + 1).getImm(), + MI->getOperand(OpNum + 2).getReg(), O); +} + void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O) { static const char *const CondNames[] = { diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h index 753903cf06d55..847b6962e6f20 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -39,7 +39,8 @@ public: // Override MCInstPrinter. void printRegName(raw_ostream &O, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; private: // Print various types of operand. @@ -47,15 +48,21 @@ private: void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDVAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU1ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU2ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU3ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU12ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O); void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O); // Print the mnemonic for a condition-code mask ("ne", "lh", etc.) diff --git a/lib/Target/SystemZ/LLVMBuild.txt b/lib/Target/SystemZ/LLVMBuild.txt index 542aaee773583..6f8431db7b11c 100644 --- a/lib/Target/SystemZ/LLVMBuild.txt +++ b/lib/Target/SystemZ/LLVMBuild.txt @@ -31,5 +31,5 @@ has_jit = 1 type = Library name = SystemZCodeGen parent = SystemZ -required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target +required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target add_to_library_groups = SystemZ diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 6e7268de55c16..1c3887ab54560 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -27,9 +27,10 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) { switch (unsigned(Kind)) { case SystemZ::FK_390_PC16DBL: case SystemZ::FK_390_PC32DBL: - case SystemZ::FK_390_PLT16DBL: - case SystemZ::FK_390_PLT32DBL: return (int64_t)Value / 2; + + case SystemZ::FK_390_TLS_CALL: + return 0; } llvm_unreachable("Unknown fixup kind!"); @@ -61,7 +62,7 @@ public: llvm_unreachable("SystemZ does do not have assembler relaxation"); } bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createSystemZObjectWriter(OS, OSABI); } }; @@ -72,8 +73,7 @@ SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { const static MCFixupKindInfo Infos[SystemZ::NumTargetFixupKinds] = { { "FK_390_PC16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, { "FK_390_PC32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, - { "FK_390_PLT16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, - { "FK_390_PLT32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel } + { "FK_390_TLS_CALL", 0, 0, 0 } }; if (Kind < FirstTargetFixupKind) diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index 27b4bd855b3e4..c9290c1922d32 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -16,7 +16,9 @@ #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" using namespace llvm; @@ -32,10 +34,10 @@ public: : MCII(mcii), Ctx(ctx) { } - ~SystemZMCCodeEmitter() {} + ~SystemZMCCodeEmitter() override {} // OVerride MCCodeEmitter. - void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + void encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const override; @@ -70,37 +72,55 @@ private: uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; + uint64_t getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; // Operand OpNum of MI needs a PC-relative fixup of kind Kind at // Offset bytes from the start of MI. Add the fixup to Fixups // and return the in-place addend, which since we're a RELA target - // is always 0. + // is always 0. If AllowTLS is true and optional operand OpNum + 1 + // is present, also emit a TLS call fixup for it. uint64_t getPCRelEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups, - unsigned Kind, int64_t Offset) const; + unsigned Kind, int64_t Offset, + bool AllowTLS) const; uint64_t getPC16DBLEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { - return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2); + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC16DBL, 2, false); } uint64_t getPC32DBLEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { - return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2); + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC32DBL, 2, false); + } + uint64_t getPC16DBLTLSEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC16DBL, 2, true); + } + uint64_t getPC32DBLTLSEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + return getPCRelEncoding(MI, OpNum, Fixups, + SystemZ::FK_390_PC32DBL, 2, true); } }; } // end anonymous namespace MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &MCSTI, MCContext &Ctx) { return new SystemZMCCodeEmitter(MCII, Ctx); } void SystemZMCCodeEmitter:: -EncodeInstruction(const MCInst &MI, raw_ostream &OS, +encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); @@ -178,10 +198,22 @@ getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum, return (Len << 16) | (Base << 12) | Disp; } +uint64_t SystemZMCCodeEmitter:: +getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI); + uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI); + assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<5>(Index)); + return (Index << 16) | (Base << 12) | Disp; +} + uint64_t SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups, - unsigned Kind, int64_t Offset) const { + unsigned Kind, int64_t Offset, + bool AllowTLS) const { const MCOperand &MO = MI.getOperand(OpNum); const MCExpr *Expr; if (MO.isImm()) @@ -197,7 +229,14 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); } } - Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind)); + Fixups.push_back(MCFixup::create(Offset, Expr, (MCFixupKind)Kind)); + + // Output the fixup for the TLS marker if present. + if (AllowTLS && OpNum + 1 < MI.getNumOperands()) { + const MCOperand &MOTLS = MI.getOperand(OpNum + 1); + Fixups.push_back(MCFixup::create(0, MOTLS.getExpr(), + (MCFixupKind)SystemZ::FK_390_TLS_CALL)); + } return 0; } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h index 52a8d1d6600b3..229ab5dc86fb0 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h @@ -18,8 +18,7 @@ enum FixupKind { // These correspond directly to R_390_* relocations. FK_390_PC16DBL = FirstTargetFixupKind, FK_390_PC32DBL, - FK_390_PLT16DBL, - FK_390_PLT32DBL, + FK_390_TLS_CALL, // Marker LastTargetFixupKind, diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index c6a1816588963..ee1af023769e8 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -20,7 +20,7 @@ class SystemZObjectWriter : public MCELFObjectTargetWriter { public: SystemZObjectWriter(uint8_t OSABI); - virtual ~SystemZObjectWriter(); + ~SystemZObjectWriter() override; protected: // Override MCELFObjectTargetWriter. @@ -55,8 +55,6 @@ static unsigned getPCRelReloc(unsigned Kind) { case FK_Data_8: return ELF::R_390_PC64; case SystemZ::FK_390_PC16DBL: return ELF::R_390_PC16DBL; case SystemZ::FK_390_PC32DBL: return ELF::R_390_PC32DBL; - case SystemZ::FK_390_PLT16DBL: return ELF::R_390_PLT16DBL; - case SystemZ::FK_390_PLT32DBL: return ELF::R_390_PLT32DBL; } llvm_unreachable("Unsupported PC-relative address"); } @@ -70,6 +68,35 @@ static unsigned getTLSLEReloc(unsigned Kind) { llvm_unreachable("Unsupported absolute address"); } +// Return the R_390_TLS_LDO* relocation type for MCFixupKind Kind. +static unsigned getTLSLDOReloc(unsigned Kind) { + switch (Kind) { + case FK_Data_4: return ELF::R_390_TLS_LDO32; + case FK_Data_8: return ELF::R_390_TLS_LDO64; + } + llvm_unreachable("Unsupported absolute address"); +} + +// Return the R_390_TLS_LDM* relocation type for MCFixupKind Kind. +static unsigned getTLSLDMReloc(unsigned Kind) { + switch (Kind) { + case FK_Data_4: return ELF::R_390_TLS_LDM32; + case FK_Data_8: return ELF::R_390_TLS_LDM64; + case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_LDCALL; + } + llvm_unreachable("Unsupported absolute address"); +} + +// Return the R_390_TLS_GD* relocation type for MCFixupKind Kind. +static unsigned getTLSGDReloc(unsigned Kind) { + switch (Kind) { + case FK_Data_4: return ELF::R_390_TLS_GD32; + case FK_Data_8: return ELF::R_390_TLS_GD64; + case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_GDCALL; + } + llvm_unreachable("Unsupported absolute address"); +} + // Return the PLT relocation counterpart of MCFixupKind Kind. static unsigned getPLTReloc(unsigned Kind) { switch (Kind) { @@ -94,6 +121,23 @@ unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target, assert(!IsPCRel && "NTPOFF shouldn't be PC-relative"); return getTLSLEReloc(Kind); + case MCSymbolRefExpr::VK_INDNTPOFF: + if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL) + return ELF::R_390_TLS_IEENT; + llvm_unreachable("Only PC-relative INDNTPOFF accesses are supported for now"); + + case MCSymbolRefExpr::VK_DTPOFF: + assert(!IsPCRel && "DTPOFF shouldn't be PC-relative"); + return getTLSLDOReloc(Kind); + + case MCSymbolRefExpr::VK_TLSLDM: + assert(!IsPCRel && "TLSLDM shouldn't be PC-relative"); + return getTLSLDMReloc(Kind); + + case MCSymbolRefExpr::VK_TLSGD: + assert(!IsPCRel && "TLSGD shouldn't be PC-relative"); + return getTLSGDReloc(Kind); + case MCSymbolRefExpr::VK_GOT: if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL) return ELF::R_390_GOTENT; @@ -108,7 +152,7 @@ unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target, } } -MCObjectWriter *llvm::createSystemZObjectWriter(raw_ostream &OS, +MCObjectWriter *llvm::createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) { MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI); return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false); diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 6e82b6d98ae43..8c2075afe505e 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -12,6 +12,7 @@ #include "SystemZMCAsmInfo.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/TargetRegistry.h" @@ -76,6 +77,39 @@ const unsigned SystemZMC::FP128Regs[16] = { SystemZ::F12Q, SystemZ::F13Q, 0, 0 }; +const unsigned SystemZMC::VR32Regs[32] = { + SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, + SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, + SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, + SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S, + SystemZ::F16S, SystemZ::F17S, SystemZ::F18S, SystemZ::F19S, + SystemZ::F20S, SystemZ::F21S, SystemZ::F22S, SystemZ::F23S, + SystemZ::F24S, SystemZ::F25S, SystemZ::F26S, SystemZ::F27S, + SystemZ::F28S, SystemZ::F29S, SystemZ::F30S, SystemZ::F31S +}; + +const unsigned SystemZMC::VR64Regs[32] = { + SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D, + SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D, + SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, + SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D, + SystemZ::F16D, SystemZ::F17D, SystemZ::F18D, SystemZ::F19D, + SystemZ::F20D, SystemZ::F21D, SystemZ::F22D, SystemZ::F23D, + SystemZ::F24D, SystemZ::F25D, SystemZ::F26D, SystemZ::F27D, + SystemZ::F28D, SystemZ::F29D, SystemZ::F30D, SystemZ::F31D +}; + +const unsigned SystemZMC::VR128Regs[32] = { + SystemZ::V0, SystemZ::V1, SystemZ::V2, SystemZ::V3, + SystemZ::V4, SystemZ::V5, SystemZ::V6, SystemZ::V7, + SystemZ::V8, SystemZ::V9, SystemZ::V10, SystemZ::V11, + SystemZ::V12, SystemZ::V13, SystemZ::V14, SystemZ::V15, + SystemZ::V16, SystemZ::V17, SystemZ::V18, SystemZ::V19, + SystemZ::V20, SystemZ::V21, SystemZ::V22, SystemZ::V23, + SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27, + SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31 +}; + unsigned SystemZMC::getFirstReg(unsigned Reg) { static unsigned Map[SystemZ::NUM_TARGET_REGS]; static bool Initialized = false; @@ -85,10 +119,13 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) { Map[GRH32Regs[I]] = I; Map[GR64Regs[I]] = I; Map[GR128Regs[I]] = I; - Map[FP32Regs[I]] = I; - Map[FP64Regs[I]] = I; Map[FP128Regs[I]] = I; } + for (unsigned I = 0; I < 32; ++I) { + Map[VR32Regs[I]] = I; + Map[VR64Regs[I]] = I; + Map[VR128Regs[I]] = I; + } } assert(Reg < SystemZ::NUM_TARGET_REGS); return Map[Reg]; @@ -168,27 +205,18 @@ static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM, CM = CodeModel::Small; else if (CM == CodeModel::JITDefault) CM = RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium; - X->InitMCCodeGenInfo(RM, CM, OL); + X->initMCCodeGenInfo(RM, CM, OL); return X; } -static MCInstPrinter *createSystemZMCInstPrinter(const Target &T, +static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { return new SystemZInstPrinter(MAI, MII, MRI); } -static MCStreamer * -createSystemZMCObjectStreamer(const Target &T, StringRef TT, MCContext &Ctx, - MCAsmBackend &MAB, raw_ostream &OS, - MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll) { - return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll); -} - extern "C" void LLVMInitializeSystemZTargetMC() { // Register the MCAsmInfo. TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget, @@ -221,8 +249,4 @@ extern "C" void LLVMInitializeSystemZTargetMC() { // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(TheSystemZTarget, createSystemZMCInstPrinter); - - // Register the MCObjectStreamer; - TargetRegistry::RegisterMCObjectStreamer(TheSystemZTarget, - createSystemZMCObjectStreamer); } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index 5eb6526a5c000..36ea750ec8dc7 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -23,6 +23,7 @@ class MCRegisterInfo; class MCSubtargetInfo; class StringRef; class Target; +class raw_pwrite_stream; class raw_ostream; extern Target TheSystemZTarget; @@ -48,6 +49,9 @@ extern const unsigned GR128Regs[16]; extern const unsigned FP32Regs[16]; extern const unsigned FP64Regs[16]; extern const unsigned FP128Regs[16]; +extern const unsigned VR32Regs[32]; +extern const unsigned VR64Regs[32]; +extern const unsigned VR128Regs[32]; // Return the 0-based number of the first architectural register that // contains the given LLVM register. E.g. R1D -> 1. @@ -67,18 +71,22 @@ inline unsigned getRegAsGR32(unsigned Reg) { inline unsigned getRegAsGRH32(unsigned Reg) { return GRH32Regs[getFirstReg(Reg)]; } + +// Return the given register as a VR128. +inline unsigned getRegAsVR128(unsigned Reg) { + return VR128Regs[getFirstReg(Reg)]; +} } // end namespace SystemZMC MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, MCContext &Ctx); MCAsmBackend *createSystemZMCAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); -MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI); +MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI); } // end namespace llvm // Defines symbolic names for SystemZ registers. diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index c8b95b2b2ca71..cafe2c5948c44 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -68,6 +68,25 @@ const unsigned CCMASK_TM_MSB_0 = CCMASK_0 | CCMASK_1; const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3; const unsigned CCMASK_TM = CCMASK_ANY; +// Condition-code mask assignments for TRANSACTION_BEGIN. +const unsigned CCMASK_TBEGIN_STARTED = CCMASK_0; +const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1; +const unsigned CCMASK_TBEGIN_TRANSIENT = CCMASK_2; +const unsigned CCMASK_TBEGIN_PERSISTENT = CCMASK_3; +const unsigned CCMASK_TBEGIN = CCMASK_ANY; + +// Condition-code mask assignments for TRANSACTION_END. +const unsigned CCMASK_TEND_TX = CCMASK_0; +const unsigned CCMASK_TEND_NOTX = CCMASK_2; +const unsigned CCMASK_TEND = CCMASK_TEND_TX | CCMASK_TEND_NOTX; + +// Condition-code mask assignments for vector comparisons (and similar +// operations). +const unsigned CCMASK_VCMP_ALL = CCMASK_0; +const unsigned CCMASK_VCMP_MIXED = CCMASK_1; +const unsigned CCMASK_VCMP_NONE = CCMASK_3; +const unsigned CCMASK_VCMP = CCMASK_0 | CCMASK_1 | CCMASK_3; + // The position of the low CC bit in an IPM result. const unsigned IPM_CC = 28; @@ -75,6 +94,13 @@ const unsigned IPM_CC = 28; const unsigned PFD_READ = 1; const unsigned PFD_WRITE = 2; +// Number of bits in a vector register. +const unsigned VectorBits = 128; + +// Number of bytes in a vector register (and consequently the number of +// bytes in a general permute vector). +const unsigned VectorBytes = VectorBits / 8; + // Return true if Val fits an LLILL operand. static inline bool isImmLL(uint64_t Val) { return (Val & ~0x000000000000ffffULL) == 0; @@ -111,6 +137,7 @@ FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); } // end namespace llvm #endif diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td index 5f829034902f4..d4d636d3479c5 100644 --- a/lib/Target/SystemZ/SystemZ.td +++ b/lib/Target/SystemZ/SystemZ.td @@ -40,6 +40,7 @@ include "SystemZOperands.td" include "SystemZPatterns.td" include "SystemZInstrFormats.td" include "SystemZInstrInfo.td" +include "SystemZInstrVector.td" include "SystemZInstrFP.td" def SystemZInstrInfo : InstrInfo {} diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index f4f3ec7a97332..a0d079fcc3598 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -66,6 +66,41 @@ static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) { .addImm(MI->getOperand(5).getImm()); } +static const MCSymbolRefExpr *getTLSGetOffset(MCContext &Context) { + StringRef Name = "__tls_get_offset"; + return MCSymbolRefExpr::Create(Context.getOrCreateSymbol(Name), + MCSymbolRefExpr::VK_PLT, + Context); +} + +static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) { + StringRef Name = "_GLOBAL_OFFSET_TABLE_"; + return MCSymbolRefExpr::Create(Context.getOrCreateSymbol(Name), + MCSymbolRefExpr::VK_None, + Context); +} + +// MI loads the high part of a vector from memory. Return an instruction +// that uses replicating vector load Opcode to do the same thing. +static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()); +} + +// MI stores the high part of a vector to memory. Return an instruction +// that uses elemental vector store Opcode to do the same thing. +static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(MI->getOperand(1).getReg()) + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()) + .addImm(0); +} + void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { SystemZMCInstLower Lower(MF->getContext(), *this); MCInst LoweredMI; @@ -95,6 +130,26 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D); break; + case SystemZ::TLS_GDCALL: + LoweredMI = MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R14D) + .addExpr(getTLSGetOffset(MF->getContext())) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSGD)); + break; + + case SystemZ::TLS_LDCALL: + LoweredMI = MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R14D) + .addExpr(getTLSGetOffset(MF->getContext())) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSLDM)); + break; + + case SystemZ::GOT: + LoweredMI = MCInstBuilder(SystemZ::LARL) + .addReg(MI->getOperand(0).getReg()) + .addExpr(getGlobalOffsetTable(MF->getContext())); + break; + case SystemZ::IILF64: LoweredMI = MCInstBuilder(SystemZ::IILF) .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) @@ -117,6 +172,51 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG); break; + case SystemZ::VLVGP32: + LoweredMI = MCInstBuilder(SystemZ::VLVGP) + .addReg(MI->getOperand(0).getReg()) + .addReg(SystemZMC::getRegAsGR64(MI->getOperand(1).getReg())) + .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg())); + break; + + case SystemZ::VLR32: + case SystemZ::VLR64: + LoweredMI = MCInstBuilder(SystemZ::VLR) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg())); + break; + + case SystemZ::VL32: + LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF); + break; + + case SystemZ::VL64: + LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPG); + break; + + case SystemZ::VST32: + LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEF); + break; + + case SystemZ::VST64: + LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEG); + break; + + case SystemZ::LFER: + LoweredMI = MCInstBuilder(SystemZ::VLGVF) + .addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg())) + .addReg(0).addImm(0); + break; + + case SystemZ::LEFR: + LoweredMI = MCInstBuilder(SystemZ::VLVGF) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) + .addReg(MI->getOperand(1).getReg()) + .addReg(0).addImm(0); + break; + #define LOWER_LOW(NAME) \ case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break @@ -152,7 +252,7 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { #undef LOWER_HIGH case SystemZ::Serialize: - if (Subtarget->hasFastSerialization()) + if (MF->getSubtarget<SystemZSubtarget>().hasFastSerialization()) LoweredMI = MCInstBuilder(SystemZ::AsmBCR) .addImm(14).addReg(SystemZ::R0D); else @@ -164,7 +264,7 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { Lower.lower(MI, LoweredMI); break; } - EmitToStreamer(OutStreamer, LoweredMI); + EmitToStreamer(*OutStreamer, LoweredMI); } // Convert a SystemZ-specific constant pool modifier into the associated @@ -172,6 +272,9 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { static MCSymbolRefExpr::VariantKind getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) { switch (Modifier) { + case SystemZCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD; + case SystemZCP::TLSLDM: return MCSymbolRefExpr::VK_TLSLDM; + case SystemZCP::DTPOFF: return MCSymbolRefExpr::VK_DTPOFF; case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF; } llvm_unreachable("Invalid SystemCPModifier!"); @@ -185,10 +288,9 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { MCSymbolRefExpr::Create(getSymbol(ZCPV->getGlobalValue()), getModifierVariantKind(ZCPV->getModifier()), OutContext); - uint64_t Size = - TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(ZCPV->getType()); + uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType()); - OutStreamer.EmitValue(Expr, Size); + OutStreamer->EmitValue(Expr, Size); } bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, @@ -219,29 +321,6 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) { - if (Subtarget->isTargetELF()) { - auto &TLOFELF = - static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); - - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); - - // Output stubs for external and common global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); - - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - OutStreamer.EmitLabel(Stubs[i].first); - OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0)); - } - Stubs.clear(); - } - } -} - // Force static initialization. extern "C" void LLVMInitializeSystemZAsmPrinter() { RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget); diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h index 64672792a8765..7f6e823729dc7 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.h +++ b/lib/Target/SystemZ/SystemZAsmPrinter.h @@ -22,14 +22,9 @@ class Module; class raw_ostream; class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter { -private: - const SystemZSubtarget *Subtarget; - public: - SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) { - Subtarget = &TM.getSubtarget<SystemZSubtarget>(); - } + SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) + : AsmPrinter(TM, std::move(Streamer)) {} // Override AsmPrinter. const char *getPassName() const override { @@ -43,7 +38,6 @@ public: bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS) override; - void EmitEndOfAsmFile(Module &M) override; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZCallingConv.h b/lib/Target/SystemZ/SystemZCallingConv.h index 71605ac112685..bff0706618aa8 100644 --- a/lib/Target/SystemZ/SystemZCallingConv.h +++ b/lib/Target/SystemZ/SystemZCallingConv.h @@ -10,6 +10,9 @@ #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/CallingConvLower.h" + namespace llvm { namespace SystemZ { const unsigned NumArgGPRs = 5; @@ -18,6 +21,64 @@ namespace SystemZ { const unsigned NumArgFPRs = 4; extern const unsigned ArgFPRs[NumArgFPRs]; } // end namespace SystemZ + +class SystemZCCState : public CCState { +private: + /// Records whether the value was a fixed argument. + /// See ISD::OutputArg::IsFixed. + SmallVector<bool, 4> ArgIsFixed; + + /// Records whether the value was widened from a short vector type. + SmallVector<bool, 4> ArgIsShortVector; + + // Check whether ArgVT is a short vector type. + bool IsShortVectorType(EVT ArgVT) { + return ArgVT.isVector() && ArgVT.getStoreSize() <= 8; + } + +public: + SystemZCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, + SmallVectorImpl<CCValAssign> &locs, LLVMContext &C) + : CCState(CC, isVarArg, MF, locs, C) {} + + void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, + CCAssignFn Fn) { + // Formal arguments are always fixed. + ArgIsFixed.clear(); + for (unsigned i = 0; i < Ins.size(); ++i) + ArgIsFixed.push_back(true); + // Record whether the call operand was a short vector. + ArgIsShortVector.clear(); + for (unsigned i = 0; i < Ins.size(); ++i) + ArgIsShortVector.push_back(IsShortVectorType(Ins[i].ArgVT)); + + CCState::AnalyzeFormalArguments(Ins, Fn); + } + + void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, + CCAssignFn Fn) { + // Record whether the call operand was a fixed argument. + ArgIsFixed.clear(); + for (unsigned i = 0; i < Outs.size(); ++i) + ArgIsFixed.push_back(Outs[i].IsFixed); + // Record whether the call operand was a short vector. + ArgIsShortVector.clear(); + for (unsigned i = 0; i < Outs.size(); ++i) + ArgIsShortVector.push_back(IsShortVectorType(Outs[i].ArgVT)); + + CCState::AnalyzeCallOperands(Outs, Fn); + } + + // This version of AnalyzeCallOperands in the base class is not usable + // since we must provide a means of accessing ISD::OutputArg::IsFixed. + void AnalyzeCallOperands(const SmallVectorImpl<MVT> &Outs, + SmallVectorImpl<ISD::ArgFlagsTy> &Flags, + CCAssignFn Fn) = delete; + + bool IsFixed(unsigned ValNo) { return ArgIsFixed[ValNo]; } + bool IsShortVector(unsigned ValNo) { return ArgIsShortVector[ValNo]; } +}; + } // end namespace llvm #endif diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td index fb0d1d8a3fe7e..be8f00b57adb5 100644 --- a/lib/Target/SystemZ/SystemZCallingConv.td +++ b/lib/Target/SystemZ/SystemZCallingConv.td @@ -12,6 +12,20 @@ class CCIfExtend<CCAction A> : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>; +class CCIfSubtarget<string F, CCAction A> + : CCIf<!strconcat("static_cast<const SystemZSubtarget&>" + "(State.getMachineFunction().getSubtarget()).", F), + A>; + +// Match if this specific argument is a fixed (i.e. named) argument. +class CCIfFixed<CCAction A> + : CCIf<"static_cast<SystemZCCState *>(&State)->IsFixed(ValNo)", A>; + +// Match if this specific argument was widened from a short vector type. +class CCIfShortVector<CCAction A> + : CCIf<"static_cast<SystemZCCState *>(&State)->IsShortVector(ValNo)", A>; + + //===----------------------------------------------------------------------===// // z/Linux return value calling convention //===----------------------------------------------------------------------===// @@ -31,7 +45,14 @@ def RetCC_SystemZ : CallingConv<[ // doesn't care about the ABI. All floating-point argument registers // are call-clobbered, so we can use all of them here. CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, - CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>> + CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>, + + // Similarly for vectors, with V24 being the ABI-compliant choice. + // Sub-128 vectors are returned in the same way, but they're widened + // to one of these types during type legalization. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>> // ABI-compliant code returns long double by reference, but that conversion // is left to higher-level code. Perhaps we could add an f128 definition @@ -60,6 +81,25 @@ def CC_SystemZ : CallingConv<[ CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>, + // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors + // are passed in the same way, but they're widened to one of these types + // during type legalization. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfFixed<CCAssignToReg<[V24, V26, V28, V30, + V25, V27, V29, V31]>>>>, + + // However, sub-128 vectors which need to go on the stack occupy just a + // single 8-byte-aligned 8-byte stack slot. Pass as i64. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfShortVector<CCBitConvertToType<i64>>>>, + + // Other vector arguments are passed in 8-byte-aligned 16-byte stack slots. + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToStack<16, 8>>>, + // Other arguments are passed in 8-byte-aligned 8-byte stack slots. CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>> ]>; diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp index 19cec219e2d1d..44ea1d25f08e3 100644 --- a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp +++ b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp @@ -28,6 +28,11 @@ SystemZConstantPoolValue::Create(const GlobalValue *GV, unsigned SystemZConstantPoolValue::getRelocationInfo() const { switch (Modifier) { + case SystemZCP::TLSGD: + case SystemZCP::TLSLDM: + case SystemZCP::DTPOFF: + // May require a dynamic relocation. + return 2; case SystemZCP::NTPOFF: // May require a relocation, but the relocations are always resolved // by the static linker. diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.h b/lib/Target/SystemZ/SystemZConstantPoolValue.h index 0bd8c205ea4d0..e5f1bb18581ba 100644 --- a/lib/Target/SystemZ/SystemZConstantPoolValue.h +++ b/lib/Target/SystemZ/SystemZConstantPoolValue.h @@ -19,13 +19,17 @@ class GlobalValue; namespace SystemZCP { enum SystemZCPModifier { + TLSGD, + TLSLDM, + DTPOFF, NTPOFF }; } // end namespace SystemZCP /// A SystemZ-specific constant pool value. At present, the only -/// defined constant pool values are offsets of thread-local variables -/// (written x@NTPOFF). +/// defined constant pool values are module IDs or offsets of +/// thread-local variables (written x@TLSGD, x@TLSLDM, x@DTPOFF, +/// or x@NTPOFF). class SystemZConstantPoolValue : public MachineConstantPoolValue { const GlobalValue *GV; SystemZCP::SystemZCPModifier Modifier; diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp index ce99ee5bc4127..16f9adc79f176 100644 --- a/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -47,7 +47,7 @@ struct Reference { return *this; } - LLVM_EXPLICIT operator bool() const { return Def || Use; } + explicit operator bool() const { return Def || Use; } // True if the register is defined or used in some form, either directly or // via a sub- or super-register. diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index eff4ae3baf3f5..a636b35635ce3 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -309,8 +309,9 @@ static void emitIncrement(MachineBasicBlock &MBB, } } -void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); +void SystemZFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFFrame = MF.getFrameInfo(); auto *ZII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h index cefa56fd74e5b..60bad894ee44d 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/lib/Target/SystemZ/SystemZFrameLowering.h @@ -40,7 +40,7 @@ public: override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 5f84624c38ea7..63992936813d7 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -127,12 +127,11 @@ struct RxSBGOperands { }; class SystemZDAGToDAGISel : public SelectionDAGISel { - const SystemZTargetLowering &Lowering; - const SystemZSubtarget &Subtarget; + const SystemZSubtarget *Subtarget; // Used by SystemZOperands.td to create integer constants. inline SDValue getImm(const SDNode *Node, uint64_t Imm) const { - return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); + return CurDAG->getTargetConstant(Imm, SDLoc(Node), Node->getValueType(0)); } const SystemZTargetMachine &getTargetMachine() const { @@ -140,7 +139,7 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { } const SystemZInstrInfo *getInstrInfo() const { - return getTargetMachine().getSubtargetImpl()->getInstrInfo(); + return Subtarget->getInstrInfo(); } // Try to fold more of the base or index of AM into AM, where IsBase @@ -256,6 +255,13 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { Addr, Base, Disp, Index); } + // Try to match Addr as an address with a base, 12-bit displacement + // and index, where the index is element Elem of a vector. + // Return true on success, storing the base, displacement and vector + // in Base, Disp and Index respectively. + bool selectBDVAddr12Only(SDValue Addr, SDValue Elem, SDValue &Base, + SDValue &Disp, SDValue &Index) const; + // Check whether (or Op (and X InsertMask)) is effectively an insertion // of X into bits InsertMask of some Y != Op. Return true if so and // set Op to that Y. @@ -293,6 +299,12 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal); + // Try to use gather instruction Opcode to implement vector insertion N. + SDNode *tryGather(SDNode *N, unsigned Opcode); + + // Try to use scatter instruction Opcode to implement store Store. + SDNode *tryScatter(StoreSDNode *Store, unsigned Opcode); + // Return true if Load and Store are loads and stores of the same size // and are guaranteed not to overlap. Such operations can be implemented // using block (SS-format) instructions. @@ -315,9 +327,12 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { public: SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(TM, OptLevel), - Lowering(*TM.getSubtargetImpl()->getTargetLowering()), - Subtarget(*TM.getSubtargetImpl()) {} + : SelectionDAGISel(TM, OptLevel) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + Subtarget = &MF.getSubtarget<SystemZSubtarget>(); + return SelectionDAGISel::runOnMachineFunction(MF); + } // Override MachineFunctionPass. const char *getPassName() const override { @@ -326,7 +341,7 @@ public: // Override SelectionDAGISel. SDNode *Select(SDNode *Node) override; - bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) override; // Include the pieces autogenerated from the target description. @@ -594,7 +609,7 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, } // Lower the displacement to a TargetConstant. - Disp = CurDAG->getTargetConstant(AM.Disp, VT); + Disp = CurDAG->getTargetConstant(AM.Disp, SDLoc(Base), VT); } void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, @@ -643,6 +658,30 @@ bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form, return true; } +bool SystemZDAGToDAGISel::selectBDVAddr12Only(SDValue Addr, SDValue Elem, + SDValue &Base, + SDValue &Disp, + SDValue &Index) const { + SDValue Regs[2]; + if (selectBDXAddr12Only(Addr, Regs[0], Disp, Regs[1]) && + Regs[0].getNode() && Regs[1].getNode()) { + for (unsigned int I = 0; I < 2; ++I) { + Base = Regs[I]; + Index = Regs[1 - I]; + // We can't tell here whether the index vector has the right type + // for the access; the caller needs to do that instead. + if (Index.getOpcode() == ISD::ZERO_EXTEND) + Index = Index.getOperand(0); + if (Index.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Index.getOperand(1) == Elem) { + Index = Index.getOperand(0); + return true; + } + } + } + return false; +} + bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, uint64_t InsertMask) const { // We're only interested in cases where the insertion is into some operand @@ -862,6 +901,7 @@ SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const { } SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { + SDLoc DL(N); EVT VT = N->getValueType(0); RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0)); unsigned Count = 0; @@ -887,7 +927,7 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { // Force the new mask into the DAG, since it may include known-one bits. auto *MaskN = cast<ConstantSDNode>(N->getOperand(1).getNode()); if (MaskN->getZExtValue() != RISBG.Mask) { - SDValue NewMask = CurDAG->getConstant(RISBG.Mask, VT); + SDValue NewMask = CurDAG->getConstant(RISBG.Mask, DL, VT); N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask); return SelectCode(N); } @@ -896,22 +936,25 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { } unsigned Opcode = SystemZ::RISBG; + // Prefer RISBGN if available, since it does not clobber CC. + if (Subtarget->hasMiscellaneousExtensions()) + Opcode = SystemZ::RISBGN; EVT OpcodeVT = MVT::i64; - if (VT == MVT::i32 && Subtarget.hasHighWord()) { + if (VT == MVT::i32 && Subtarget->hasHighWord()) { Opcode = SystemZ::RISBMux; OpcodeVT = MVT::i32; RISBG.Start &= 31; RISBG.End &= 31; } SDValue Ops[5] = { - getUNDEF(SDLoc(N), OpcodeVT), - convertTo(SDLoc(N), OpcodeVT, RISBG.Input), - CurDAG->getTargetConstant(RISBG.Start, MVT::i32), - CurDAG->getTargetConstant(RISBG.End | 128, MVT::i32), - CurDAG->getTargetConstant(RISBG.Rotate, MVT::i32) + getUNDEF(DL, OpcodeVT), + convertTo(DL, OpcodeVT, RISBG.Input), + CurDAG->getTargetConstant(RISBG.Start, DL, MVT::i32), + CurDAG->getTargetConstant(RISBG.End | 128, DL, MVT::i32), + CurDAG->getTargetConstant(RISBG.Rotate, DL, MVT::i32) }; - N = CurDAG->getMachineNode(Opcode, SDLoc(N), OpcodeVT, Ops); - return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); + N = CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops); + return convertTo(DL, VT, SDValue(N, 0)).getNode(); } SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { @@ -943,19 +986,24 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { // See whether we can avoid an AND in the first operand by converting // ROSBG to RISBG. - if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) + if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) { Opcode = SystemZ::RISBG; - + // Prefer RISBGN if available, since it does not clobber CC. + if (Subtarget->hasMiscellaneousExtensions()) + Opcode = SystemZ::RISBGN; + } + + SDLoc DL(N); EVT VT = N->getValueType(0); SDValue Ops[5] = { - convertTo(SDLoc(N), MVT::i64, Op0), - convertTo(SDLoc(N), MVT::i64, RxSBG[I].Input), - CurDAG->getTargetConstant(RxSBG[I].Start, MVT::i32), - CurDAG->getTargetConstant(RxSBG[I].End, MVT::i32), - CurDAG->getTargetConstant(RxSBG[I].Rotate, MVT::i32) + convertTo(DL, MVT::i64, Op0), + convertTo(DL, MVT::i64, RxSBG[I].Input), + CurDAG->getTargetConstant(RxSBG[I].Start, DL, MVT::i32), + CurDAG->getTargetConstant(RxSBG[I].End, DL, MVT::i32), + CurDAG->getTargetConstant(RxSBG[I].Rotate, DL, MVT::i32) }; - N = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, Ops); - return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); + N = CurDAG->getMachineNode(Opcode, DL, MVT::i64, Ops); + return convertTo(DL, VT, SDValue(N, 0)).getNode(); } SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, @@ -963,16 +1011,81 @@ SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, uint64_t LowerVal) { EVT VT = Node->getValueType(0); SDLoc DL(Node); - SDValue Upper = CurDAG->getConstant(UpperVal, VT); + SDValue Upper = CurDAG->getConstant(UpperVal, DL, VT); if (Op0.getNode()) Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper); Upper = SDValue(Select(Upper.getNode()), 0); - SDValue Lower = CurDAG->getConstant(LowerVal, VT); + SDValue Lower = CurDAG->getConstant(LowerVal, DL, VT); SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower); return Or.getNode(); } +SDNode *SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) { + SDValue ElemV = N->getOperand(2); + auto *ElemN = dyn_cast<ConstantSDNode>(ElemV); + if (!ElemN) + return 0; + + unsigned Elem = ElemN->getZExtValue(); + EVT VT = N->getValueType(0); + if (Elem >= VT.getVectorNumElements()) + return 0; + + auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1)); + if (!Load || !Load->hasOneUse()) + return 0; + if (Load->getMemoryVT().getSizeInBits() != + Load->getValueType(0).getSizeInBits()) + return 0; + + SDValue Base, Disp, Index; + if (!selectBDVAddr12Only(Load->getBasePtr(), ElemV, Base, Disp, Index) || + Index.getValueType() != VT.changeVectorElementTypeToInteger()) + return 0; + + SDLoc DL(Load); + SDValue Ops[] = { + N->getOperand(0), Base, Disp, Index, + CurDAG->getTargetConstant(Elem, DL, MVT::i32), Load->getChain() + }; + SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops); + ReplaceUses(SDValue(Load, 1), SDValue(Res, 1)); + return Res; +} + +SDNode *SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) { + SDValue Value = Store->getValue(); + if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return 0; + if (Store->getMemoryVT().getSizeInBits() != + Value.getValueType().getSizeInBits()) + return 0; + + SDValue ElemV = Value.getOperand(1); + auto *ElemN = dyn_cast<ConstantSDNode>(ElemV); + if (!ElemN) + return 0; + + SDValue Vec = Value.getOperand(0); + EVT VT = Vec.getValueType(); + unsigned Elem = ElemN->getZExtValue(); + if (Elem >= VT.getVectorNumElements()) + return 0; + + SDValue Base, Disp, Index; + if (!selectBDVAddr12Only(Store->getBasePtr(), ElemV, Base, Disp, Index) || + Index.getValueType() != VT.changeVectorElementTypeToInteger()) + return 0; + + SDLoc DL(Store); + SDValue Ops[] = { + Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, DL, MVT::i32), + Store->getChain() + }; + return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); +} + bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load) const { // Check that the two memory operands have the same size. @@ -1102,13 +1215,33 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { uint64_t ConstCCMask = cast<ConstantSDNode>(CCMask.getNode())->getZExtValue(); // Invert the condition. - CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, + CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, SDLoc(Node), CCMask.getValueType()); SDValue Op4 = Node->getOperand(4); Node = CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4); } break; } + + case ISD::INSERT_VECTOR_ELT: { + EVT VT = Node->getValueType(0); + unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits(); + if (ElemBitSize == 32) + ResNode = tryGather(Node, SystemZ::VGEF); + else if (ElemBitSize == 64) + ResNode = tryGather(Node, SystemZ::VGEG); + break; + } + + case ISD::STORE: { + auto *Store = cast<StoreSDNode>(Node); + unsigned ElemBitSize = Store->getValue().getValueType().getSizeInBits(); + if (ElemBitSize == 32) + ResNode = tryScatter(Store, SystemZ::VSCEF); + else if (ElemBitSize == 64) + ResNode = tryScatter(Store, SystemZ::VSCEG); + break; + } } // Select the default instruction @@ -1127,18 +1260,29 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { bool SystemZDAGToDAGISel:: SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) { - assert(ConstraintCode == 'm' && "Unexpected constraint code"); - // Accept addresses with short displacements, which are compatible - // with Q, R, S and T. But keep the index operand for future expansion. - SDValue Base, Disp, Index; - if (!selectBDXAddr(SystemZAddressingMode::FormBD, - SystemZAddressingMode::Disp12Only, - Op, Base, Disp, Index)) - return true; - OutOps.push_back(Base); - OutOps.push_back(Disp); - OutOps.push_back(Index); - return false; + switch(ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: + case InlineAsm::Constraint_Q: + case InlineAsm::Constraint_R: + case InlineAsm::Constraint_S: + case InlineAsm::Constraint_T: + // Accept addresses with short displacements, which are compatible + // with Q, R, S and T. But keep the index operand for future expansion. + SDValue Base, Disp, Index; + if (selectBDXAddr(SystemZAddressingMode::FormBD, + SystemZAddressingMode::Disp12Only, + Op, Base, Disp, Index)) { + OutOps.push_back(Base); + OutOps.push_back(Disp); + OutOps.push_back(Index); + return false; + } + break; + } + return true; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index f7ac1ca299109..24b5a41d7f675 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/Intrinsics.h" #include <cctype> using namespace llvm; @@ -80,9 +81,9 @@ static MachineOperand earlyUseOperand(MachineOperand Op) { return Op; } -SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) - : TargetLowering(tm), - Subtarget(tm.getSubtarget<SystemZSubtarget>()) { +SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, + const SystemZSubtarget &STI) + : TargetLowering(tm), Subtarget(STI) { MVT PtrVT = getPointerTy(); // Set up the register classes. @@ -90,13 +91,27 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); else addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); - addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); - addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); - addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); + addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); + if (Subtarget.hasVector()) { + addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); + addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); + } else { + addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); + addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); + } addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); + if (Subtarget.hasVector()) { + addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); + addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); + } + // Compute derived properties from the register classes - computeRegisterProperties(); + computeRegisterProperties(Subtarget.getRegisterInfo()); // Set up special registers. setExceptionPointerRegister(SystemZ::R6D); @@ -110,7 +125,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) setSchedulingPreference(Sched::RegPressure); setBooleanContents(ZeroOrOneBooleanContent); - setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // Instructions are strings of 2-byte aligned 2-byte values. setMinFunctionAlignment(2); @@ -163,8 +178,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) // available, or if the operand is constant. setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); + // Use POPCNT on z196 and above. + if (Subtarget.hasPopulationCount()) + setOperationAction(ISD::CTPOP, VT, Custom); + else + setOperationAction(ISD::CTPOP, VT, Expand); + // No special instructions for these. - setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); @@ -244,6 +264,90 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) // Handle prefetches with PFD or PFDRL. setOperationAction(ISD::PREFETCH, MVT::Other, Custom); + for (MVT VT : MVT::vector_valuetypes()) { + // Assume by default that all vector operations need to be expanded. + for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) + if (getOperationAction(Opcode, VT) == Legal) + setOperationAction(Opcode, VT, Expand); + + // Likewise all truncating stores and extending loads. + for (MVT InnerVT : MVT::vector_valuetypes()) { + setTruncStoreAction(VT, InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); + } + + if (isTypeLegal(VT)) { + // These operations are legal for anything that can be stored in a + // vector register, even if there is no native support for the format + // as such. In particular, we can do these for v4f32 even though there + // are no specific instructions for that format. + setOperationAction(ISD::LOAD, VT, Legal); + setOperationAction(ISD::STORE, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::BITCAST, VT, Legal); + setOperationAction(ISD::UNDEF, VT, Legal); + + // Likewise, except that we need to replace the nodes with something + // more specific. + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + } + } + + // Handle integer vector types. + for (MVT VT : MVT::integer_vector_valuetypes()) { + if (isTypeLegal(VT)) { + // These operations have direct equivalents. + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); + if (VT != MVT::v2i64) + setOperationAction(ISD::MUL, VT, Legal); + setOperationAction(ISD::AND, VT, Legal); + setOperationAction(ISD::OR, VT, Legal); + setOperationAction(ISD::XOR, VT, Legal); + setOperationAction(ISD::CTPOP, VT, Custom); + setOperationAction(ISD::CTTZ, VT, Legal); + setOperationAction(ISD::CTLZ, VT, Legal); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); + + // Convert a GPR scalar to a vector by inserting it into element 0. + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + + // Use a series of unpacks for extensions. + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); + + // Detect shifts by a scalar amount and convert them into + // V*_BY_SCALAR. + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); + + // At present ROTL isn't matched by DAGCombiner. ROTR should be + // converted into ROTL. + setOperationAction(ISD::ROTL, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); + + // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands + // and inverting the result as necessary. + setOperationAction(ISD::SETCC, VT, Custom); + } + } + + if (Subtarget.hasVector()) { + // There should be no need to check for float types other than v2f64 + // since <2 x f32> isn't a legal type. + setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); + } + // Handle floating-point types. for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; @@ -269,6 +373,36 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) } } + // Handle floating-point vector types. + if (Subtarget.hasVector()) { + // Scalar-to-vector conversion is just a subreg. + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); + + // Some insertions and extractions can be done directly but others + // need to go via integers. + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); + + // These operations have direct equivalents. + setOperationAction(ISD::FADD, MVT::v2f64, Legal); + setOperationAction(ISD::FNEG, MVT::v2f64, Legal); + setOperationAction(ISD::FSUB, MVT::v2f64, Legal); + setOperationAction(ISD::FMUL, MVT::v2f64, Legal); + setOperationAction(ISD::FMA, MVT::v2f64, Legal); + setOperationAction(ISD::FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::FABS, MVT::v2f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); + setOperationAction(ISD::FRINT, MVT::v2f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); + setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); + setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); + setOperationAction(ISD::FROUND, MVT::v2f64, Legal); + } + // We have fused multiply-addition for f32 and f64 but not f128. setOperationAction(ISD::FMA, MVT::f32, Legal); setOperationAction(ISD::FMA, MVT::f64, Legal); @@ -287,8 +421,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) // We have 64-bit FPR<->GPR moves, but need special handling for // 32-bit forms. - setOperationAction(ISD::BITCAST, MVT::i32, Custom); - setOperationAction(ISD::BITCAST, MVT::f32, Custom); + if (!Subtarget.hasVector()) { + setOperationAction(ISD::BITCAST, MVT::i32, Custom); + setOperationAction(ISD::BITCAST, MVT::f32, Custom); + } // VASTART and VACOPY need to deal with the SystemZ-specific varargs // structure, but VAEND is a no-op. @@ -298,6 +434,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) // Codes for which we want to perform some z-specific combinations. setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); + setTargetDAGCombine(ISD::FP_ROUND); + + // Handle intrinsics. + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); // We want to use MVC in preference to even a single load/store pair. MaxStoresPerMemcpy = 0; @@ -342,6 +485,16 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return Imm.isZero() || Imm.isNegZero(); } +bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + // We can use CGFI or CLGFI. + return isInt<32>(Imm) || isUInt<32>(Imm); +} + +bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { + // We can use ALGFI or SLGFI. + return isUInt<32>(Imm) || isUInt<32>(-Imm); +} + bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, @@ -499,8 +652,10 @@ parseRegisterNumber(const std::string &Constraint, return std::make_pair(0U, nullptr); } -std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { +std::pair<unsigned, const TargetRegisterClass *> +SystemZTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, const std::string &Constraint, + MVT VT) const { if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { @@ -557,7 +712,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { SystemZMC::FP64Regs); } } - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } void SystemZTargetLowering:: @@ -570,35 +725,35 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, case 'I': // Unsigned 8-bit constant if (auto *C = dyn_cast<ConstantSDNode>(Op)) if (isUInt<8>(C->getZExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; case 'J': // Unsigned 12-bit constant if (auto *C = dyn_cast<ConstantSDNode>(Op)) if (isUInt<12>(C->getZExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; case 'K': // Signed 16-bit constant if (auto *C = dyn_cast<ConstantSDNode>(Op)) if (isInt<16>(C->getSExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), Op.getValueType())); return; case 'L': // Signed 20-bit displacement (on all targets we support) if (auto *C = dyn_cast<ConstantSDNode>(Op)) if (isInt<20>(C->getSExtValue())) - Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), Op.getValueType())); return; case 'M': // 0x7fffffff if (auto *C = dyn_cast<ConstantSDNode>(Op)) if (C->getZExtValue() == 0x7fffffff) - Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; } @@ -623,6 +778,24 @@ bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { return true; } +// We do not yet support 128-bit single-element vector types. If the user +// attempts to use such types as function argument or return type, prefer +// to error out instead of emitting code violating the ABI. +static void VerifyVectorType(MVT VT, EVT ArgVT) { + if (ArgVT.isVector() && !VT.isVector()) + report_fatal_error("Unsupported vector argument or return type"); +} + +static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) { + for (unsigned i = 0; i < Ins.size(); ++i) + VerifyVectorType(Ins[i].VT, Ins[i].ArgVT); +} + +static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) { + for (unsigned i = 0; i < Outs.size(); ++i) + VerifyVectorType(Outs[i].VT, Outs[i].ArgVT); +} + // Value is a value that has been passed to us in the location described by VA // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining // any loads onto Chain. @@ -643,7 +816,15 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL, else if (VA.getLocInfo() == CCValAssign::Indirect) Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value, MachinePointerInfo(), false, false, false, 0); - else + else if (VA.getLocInfo() == CCValAssign::BCvt) { + // If this is a short vector argument loaded from the stack, + // extend from i64 to full vector size and then bitcast. + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT().isVector()); + Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64, + Value, DAG.getUNDEF(MVT::i64)); + Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); + } else assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); return Value; } @@ -660,6 +841,14 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL, return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); case CCValAssign::AExt: return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); + case CCValAssign::BCvt: + // If this is a short vector argument to be stored to the stack, + // bitcast to v2i64 and then extract first element. + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT().isVector()); + Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, + DAG.getConstant(0, DL, MVT::i32)); case CCValAssign::Full: return Value; default: @@ -676,13 +865,17 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); SystemZMachineFunctionInfo *FuncInfo = - MF.getInfo<SystemZMachineFunctionInfo>(); - auto *TFL = static_cast<const SystemZFrameLowering *>( - DAG.getSubtarget().getFrameLowering()); + MF.getInfo<SystemZMachineFunctionInfo>(); + auto *TFL = + static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering()); + + // Detect unsupported vector argument types. + if (Subtarget.hasVector()) + VerifyVectorTypes(Ins); // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); unsigned NumFixedGPRs = 0; @@ -714,6 +907,14 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, NumFixedFPRs += 1; RC = &SystemZ::FP64BitRegClass; break; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + RC = &SystemZ::VR128BitRegClass; + break; } unsigned VReg = MRI.createVirtualRegister(RC); @@ -732,7 +933,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, EVT PtrVT = getPointerTy(); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) - FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4)); + FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, + DAG.getIntPtrConstant(4, DL)); ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, false, 0); @@ -818,9 +1020,15 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, MachineFunction &MF = DAG.getMachineFunction(); EVT PtrVT = getPointerTy(); + // Detect unsupported vector argument and return types. + if (Subtarget.hasVector()) { + VerifyVectorTypes(Outs); + VerifyVectorTypes(Ins); + } + // Analyze the operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; - CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); // We don't support GuaranteedTailCallOpt, only automatically-detected @@ -833,7 +1041,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Mark the start of the call. if (!IsTailCall) - Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true), + Chain = DAG.getCALLSEQ_START(Chain, + DAG.getConstant(NumBytes, DL, PtrVT, true), DL); // Copy argument values to their designated locations. @@ -869,7 +1078,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) Offset += 4; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, - DAG.getIntPtrConstant(Offset)); + DAG.getIntPtrConstant(Offset, DL)); // Emit the store. MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address, @@ -917,9 +1126,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, RegsToPass[I].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = - getTargetMachine().getSubtargetImpl()->getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -936,8 +1144,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Mark the end of the call, which is glued to the call itself. Chain = DAG.getCALLSEQ_END(Chain, - DAG.getConstant(NumBytes, PtrVT, true), - DAG.getConstant(0, PtrVT, true), + DAG.getConstant(NumBytes, DL, PtrVT, true), + DAG.getConstant(0, DL, PtrVT, true), Glue, DL); Glue = Chain.getValue(1); @@ -972,6 +1180,10 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); + // Detect unsupported vector return types. + if (Subtarget.hasVector()) + VerifyVectorTypes(Outs); + // Assign locations to each returned value. SmallVector<CCValAssign, 16> RetLocs; CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); @@ -1015,6 +1227,207 @@ prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const { return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); } +// Return true if Op is an intrinsic node with chain that returns the CC value +// as its only (other) argument. Provide the associated SystemZISD opcode and +// the mask of valid CC values if so. +static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, + unsigned &CCValid) { + unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_tbegin: + Opcode = SystemZISD::TBEGIN; + CCValid = SystemZ::CCMASK_TBEGIN; + return true; + + case Intrinsic::s390_tbegin_nofloat: + Opcode = SystemZISD::TBEGIN_NOFLOAT; + CCValid = SystemZ::CCMASK_TBEGIN; + return true; + + case Intrinsic::s390_tend: + Opcode = SystemZISD::TEND; + CCValid = SystemZ::CCMASK_TEND; + return true; + + default: + return false; + } +} + +// Return true if Op is an intrinsic node without chain that returns the +// CC value as its final argument. Provide the associated SystemZISD +// opcode and the mask of valid CC values if so. +static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { + unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_vpkshs: + case Intrinsic::s390_vpksfs: + case Intrinsic::s390_vpksgs: + Opcode = SystemZISD::PACKS_CC; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vpklshs: + case Intrinsic::s390_vpklsfs: + case Intrinsic::s390_vpklsgs: + Opcode = SystemZISD::PACKLS_CC; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vceqbs: + case Intrinsic::s390_vceqhs: + case Intrinsic::s390_vceqfs: + case Intrinsic::s390_vceqgs: + Opcode = SystemZISD::VICMPES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vchbs: + case Intrinsic::s390_vchhs: + case Intrinsic::s390_vchfs: + case Intrinsic::s390_vchgs: + Opcode = SystemZISD::VICMPHS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vchlbs: + case Intrinsic::s390_vchlhs: + case Intrinsic::s390_vchlfs: + case Intrinsic::s390_vchlgs: + Opcode = SystemZISD::VICMPHLS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vtm: + Opcode = SystemZISD::VTM; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfaebs: + case Intrinsic::s390_vfaehs: + case Intrinsic::s390_vfaefs: + Opcode = SystemZISD::VFAE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfaezbs: + case Intrinsic::s390_vfaezhs: + case Intrinsic::s390_vfaezfs: + Opcode = SystemZISD::VFAEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfeebs: + case Intrinsic::s390_vfeehs: + case Intrinsic::s390_vfeefs: + Opcode = SystemZISD::VFEE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfeezbs: + case Intrinsic::s390_vfeezhs: + case Intrinsic::s390_vfeezfs: + Opcode = SystemZISD::VFEEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfenebs: + case Intrinsic::s390_vfenehs: + case Intrinsic::s390_vfenefs: + Opcode = SystemZISD::VFENE_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfenezbs: + case Intrinsic::s390_vfenezhs: + case Intrinsic::s390_vfenezfs: + Opcode = SystemZISD::VFENEZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vistrbs: + case Intrinsic::s390_vistrhs: + case Intrinsic::s390_vistrfs: + Opcode = SystemZISD::VISTR_CC; + CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; + return true; + + case Intrinsic::s390_vstrcbs: + case Intrinsic::s390_vstrchs: + case Intrinsic::s390_vstrcfs: + Opcode = SystemZISD::VSTRC_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vstrczbs: + case Intrinsic::s390_vstrczhs: + case Intrinsic::s390_vstrczfs: + Opcode = SystemZISD::VSTRCZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vfcedbs: + Opcode = SystemZISD::VFCMPES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfchdbs: + Opcode = SystemZISD::VFCMPHS; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vfchedbs: + Opcode = SystemZISD::VFCMPHES; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + case Intrinsic::s390_vftcidb: + Opcode = SystemZISD::VFTCI; + CCValid = SystemZ::CCMASK_VCMP; + return true; + + default: + return false; + } +} + +// Emit an intrinsic with chain with a glued value instead of its CC result. +static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { + // Copy all operands except the intrinsic ID. + unsigned NumOps = Op.getNumOperands(); + SmallVector<SDValue, 6> Ops; + Ops.reserve(NumOps - 1); + Ops.push_back(Op.getOperand(0)); + for (unsigned I = 2; I < NumOps; ++I) + Ops.push_back(Op.getOperand(I)); + + assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); + SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); + SDValue OldChain = SDValue(Op.getNode(), 1); + SDValue NewChain = SDValue(Intr.getNode(), 0); + DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); + return Intr; +} + +// Emit an intrinsic with a glued value instead of its CC result. +static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op, + unsigned Opcode) { + // Copy all operands except the intrinsic ID. + unsigned NumOps = Op.getNumOperands(); + SmallVector<SDValue, 6> Ops; + Ops.reserve(NumOps - 1); + for (unsigned I = 1; I < NumOps; ++I) + Ops.push_back(Op.getOperand(I)); + + if (Op->getNumValues() == 1) + return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops); + assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result"); + SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue); + return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); +} + // CC is a comparison that will be implemented using an integer or // floating-point comparison. Return the condition code mask for // a branch on true. In the integer case, CCMASK_CMP_UO is set for @@ -1112,7 +1525,7 @@ static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { // If C can be converted to a comparison against zero, adjust the operands // as necessary. -static void adjustZeroCmp(SelectionDAG &DAG, Comparison &C) { +static void adjustZeroCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { if (C.ICmpType == SystemZICMP::UnsignedOnly) return; @@ -1126,13 +1539,13 @@ static void adjustZeroCmp(SelectionDAG &DAG, Comparison &C) { (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { C.CCMask ^= SystemZ::CCMASK_CMP_EQ; - C.Op1 = DAG.getConstant(0, C.Op1.getValueType()); + C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType()); } } // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, // adjust the operands as necessary. -static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) { +static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { // For us to make any changes, it must a comparison between a single-use // load and a constant. if (!C.Op0.hasOneUse() || @@ -1197,7 +1610,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) { // Make sure that the second operand is an i32 with the right value. if (C.Op1.getValueType() != MVT::i32 || Value != ConstOp1->getZExtValue()) - C.Op1 = DAG.getConstant(Value, MVT::i32); + C.Op1 = DAG.getConstant(Value, DL, MVT::i32); } // Return true if Op is either an unextended load, or a load suitable @@ -1293,7 +1706,7 @@ static unsigned reverseCCMask(unsigned CCMask) { // Check whether C tests for equality between X and Y and whether X - Y // or Y - X is also computed. In that case it's better to compare the // result of the subtraction against zero. -static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) { +static void adjustForSubtraction(SelectionDAG &DAG, SDLoc DL, Comparison &C) { if (C.CCMask == SystemZ::CCMASK_CMP_EQ || C.CCMask == SystemZ::CCMASK_CMP_NE) { for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { @@ -1302,7 +1715,7 @@ static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) { ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { C.Op0 = SDValue(N, 0); - C.Op1 = DAG.getConstant(0, N->getValueType(0)); + C.Op1 = DAG.getConstant(0, DL, N->getValueType(0)); return; } } @@ -1358,7 +1771,7 @@ static void adjustForLTGFR(Comparison &C) { // If C compares the truncation of an extending load, try to compare // the untruncated value instead. This exposes more opportunities to // reuse CC. -static void adjustICmpTruncate(SelectionDAG &DAG, Comparison &C) { +static void adjustICmpTruncate(SelectionDAG &DAG, SDLoc DL, Comparison &C) { if (C.Op0.getOpcode() == ISD::TRUNCATE && C.Op0.getOperand(0).getOpcode() == ISD::LOAD && C.Op1.getOpcode() == ISD::Constant && @@ -1370,7 +1783,7 @@ static void adjustICmpTruncate(SelectionDAG &DAG, Comparison &C) { if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { C.Op0 = C.Op0.getOperand(0); - C.Op1 = DAG.getConstant(0, C.Op0.getValueType()); + C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType()); } } } @@ -1489,7 +1902,7 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, // See whether C can be implemented as a TEST UNDER MASK instruction. // Update the arguments with the TM version if so. -static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { +static void adjustForTestUnderMask(SelectionDAG &DAG, SDLoc DL, Comparison &C) { // Check that we have a comparison with a constant. auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); if (!ConstOp1) @@ -1529,6 +1942,8 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { MaskVal = -(CmpVal & -CmpVal); NewC.ICmpType = SystemZICMP::UnsignedOnly; } + if (!MaskVal) + return; // Check whether the combination of mask, comparison value and comparison // type are suitable. @@ -1565,14 +1980,62 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { if (Mask && Mask->getZExtValue() == MaskVal) C.Op1 = SDValue(Mask, 0); else - C.Op1 = DAG.getConstant(MaskVal, C.Op0.getValueType()); + C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType()); C.CCValid = SystemZ::CCMASK_TM; C.CCMask = NewCCMask; } +// Return a Comparison that tests the condition-code result of intrinsic +// node Call against constant integer CC using comparison code Cond. +// Opcode is the opcode of the SystemZISD operation for the intrinsic +// and CCValid is the set of possible condition-code results. +static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, + SDValue Call, unsigned CCValid, uint64_t CC, + ISD::CondCode Cond) { + Comparison C(Call, SDValue()); + C.Opcode = Opcode; + C.CCValid = CCValid; + if (Cond == ISD::SETEQ) + // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. + C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; + else if (Cond == ISD::SETNE) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; + else if (Cond == ISD::SETLT || Cond == ISD::SETULT) + // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, + // always true for CC>3. + C.CCMask = CC < 4 ? -1 << (4 - CC) : -1; + else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0; + else if (Cond == ISD::SETLE || Cond == ISD::SETULE) + // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), + // always true for CC>3. + C.CCMask = CC < 4 ? -1 << (3 - CC) : -1; + else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) + // ...and the inverse of that. + C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0; + else + llvm_unreachable("Unexpected integer comparison type"); + C.CCMask &= CCValid; + return C; +} + // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, - ISD::CondCode Cond) { + ISD::CondCode Cond, SDLoc DL) { + if (CmpOp1.getOpcode() == ISD::Constant) { + uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue(); + unsigned Opcode, CCValid; + if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && + CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && + isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) + return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); + if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && + CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && + isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) + return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); + } Comparison C(CmpOp0, CmpOp1); C.CCMask = CCMaskForCondCode(Cond); if (C.Op0.getValueType().isFloatingPoint()) { @@ -1596,11 +2059,11 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, else C.ICmpType = SystemZICMP::SignedOnly; C.CCMask &= ~SystemZ::CCMASK_CMP_UO; - adjustZeroCmp(DAG, C); - adjustSubwordCmp(DAG, C); - adjustForSubtraction(DAG, C); + adjustZeroCmp(DAG, DL, C); + adjustSubwordCmp(DAG, DL, C); + adjustForSubtraction(DAG, DL, C); adjustForLTGFR(C); - adjustICmpTruncate(DAG, C); + adjustICmpTruncate(DAG, DL, C); } if (shouldSwapCmpOperands(C)) { @@ -1608,20 +2071,34 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, C.CCMask = reverseCCMask(C.CCMask); } - adjustForTestUnderMask(DAG, C); + adjustForTestUnderMask(DAG, DL, C); return C; } // Emit the comparison instruction described by C. static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { + if (!C.Op1.getNode()) { + SDValue Op; + switch (C.Op0.getOpcode()) { + case ISD::INTRINSIC_W_CHAIN: + Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); + break; + case ISD::INTRINSIC_WO_CHAIN: + Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode); + break; + default: + llvm_unreachable("Invalid comparison operands"); + } + return SDValue(Op.getNode(), Op->getNumValues() - 1); + } if (C.Opcode == SystemZISD::ICMP) return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1, - DAG.getConstant(C.ICmpType, MVT::i32)); + DAG.getConstant(C.ICmpType, DL, MVT::i32)); if (C.Opcode == SystemZISD::TM) { bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1, - DAG.getConstant(RegisterOnly, MVT::i32)); + DAG.getConstant(RegisterOnly, DL, MVT::i32)); } return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1); } @@ -1635,7 +2112,8 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL, Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); - Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64)); + Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, + DAG.getConstant(32, DL, MVT::i64)); Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); } @@ -1667,46 +2145,175 @@ static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue, if (Conversion.XORValue) Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result, - DAG.getConstant(Conversion.XORValue, MVT::i32)); + DAG.getConstant(Conversion.XORValue, DL, MVT::i32)); if (Conversion.AddValue) Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result, - DAG.getConstant(Conversion.AddValue, MVT::i32)); + DAG.getConstant(Conversion.AddValue, DL, MVT::i32)); // The SHR/AND sequence should get optimized to an RISBG. Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result, - DAG.getConstant(Conversion.Bit, MVT::i32)); + DAG.getConstant(Conversion.Bit, DL, MVT::i32)); if (Conversion.Bit != 31) Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, DL, MVT::i32)); return Result; } +// Return the SystemISD vector comparison operation for CC, or 0 if it cannot +// be done directly. IsFP is true if CC is for a floating-point rather than +// integer comparison. +static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) { + switch (CC) { + case ISD::SETOEQ: + case ISD::SETEQ: + return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE; + + case ISD::SETOGE: + case ISD::SETGE: + return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0); + + case ISD::SETOGT: + case ISD::SETGT: + return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH; + + case ISD::SETUGT: + return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL; + + default: + return 0; + } +} + +// Return the SystemZISD vector comparison operation for CC or its inverse, +// or 0 if neither can be done directly. Indicate in Invert whether the +// result is for the inverse of CC. IsFP is true if CC is for a +// floating-point rather than integer comparison. +static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, + bool &Invert) { + if (unsigned Opcode = getVectorComparison(CC, IsFP)) { + Invert = false; + return Opcode; + } + + CC = ISD::getSetCCInverse(CC, !IsFP); + if (unsigned Opcode = getVectorComparison(CC, IsFP)) { + Invert = true; + return Opcode; + } + + return 0; +} + +// Return a v2f64 that contains the extended form of elements Start and Start+1 +// of v4f32 value Op. +static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL, + SDValue Op) { + int Mask[] = { Start, -1, Start + 1, -1 }; + Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); + return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); +} + +// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, +// producing a result of type VT. +static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL, + EVT VT, SDValue CmpOp0, SDValue CmpOp1) { + // There is no hardware support for v4f32, so extend the vector into + // two v2f64s and compare those. + if (CmpOp0.getValueType() == MVT::v4f32) { + SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0); + SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0); + SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1); + SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1); + SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); + SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); + return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); + } + return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); +} + +// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing +// an integer mask of type VT. +static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT, + ISD::CondCode CC, SDValue CmpOp0, + SDValue CmpOp1) { + bool IsFP = CmpOp0.getValueType().isFloatingPoint(); + bool Invert = false; + SDValue Cmp; + switch (CC) { + // Handle tests for order using (or (ogt y x) (oge x y)). + case ISD::SETUO: + Invert = true; + case ISD::SETO: { + assert(IsFP && "Unexpected integer comparison"); + SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); + SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1); + Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); + break; + } + + // Handle <> tests using (or (ogt y x) (ogt x y)). + case ISD::SETUEQ: + Invert = true; + case ISD::SETONE: { + assert(IsFP && "Unexpected integer comparison"); + SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); + SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1); + Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); + break; + } + + // Otherwise a single comparison is enough. It doesn't really + // matter whether we try the inversion or the swap first, since + // there are no cases where both work. + default: + if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) + Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1); + else { + CC = ISD::getSetCCSwappedOperands(CC); + if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) + Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0); + else + llvm_unreachable("Unhandled comparison"); + } + break; + } + if (Invert) { + SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, + DAG.getConstant(65535, DL, MVT::i32)); + Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask); + Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); + } + return Cmp; +} + SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue CmpOp0 = Op.getOperand(0); SDValue CmpOp1 = Op.getOperand(1); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); SDLoc DL(Op); + EVT VT = Op.getValueType(); + if (VT.isVector()) + return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); - Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); SDValue Glue = emitCmp(DAG, DL, C); return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); } SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue CmpOp0 = Op.getOperand(2); SDValue CmpOp1 = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc DL(Op); - Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); SDValue Glue = emitCmp(DAG, DL, C); return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), - Chain, DAG.getConstant(C.CCValid, MVT::i32), - DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue); + Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32), + DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue); } // Return true if Pos is CmpOp and Neg is the negative of CmpOp, @@ -1727,7 +2334,7 @@ static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op, Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op); if (IsNegative) Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), - DAG.getConstant(0, Op.getValueType()), Op); + DAG.getConstant(0, DL, Op.getValueType()), Op); return Op; } @@ -1740,7 +2347,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); SDLoc DL(Op); - Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); // Check for absolute and negative-absolute selections, including those // where the comparison value is sign-extended (for LPGFR and LNGFR). @@ -1775,18 +2382,14 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, if (!is32Bit(VT)) Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result); // Sign-extend from the low bit. - SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, MVT::i32); + SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32); SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt); return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt); } } - SmallVector<SDValue, 5> Ops; - Ops.push_back(TrueOp); - Ops.push_back(FalseOp); - Ops.push_back(DAG.getConstant(C.CCValid, MVT::i32)); - Ops.push_back(DAG.getConstant(C.CCMask, MVT::i32)); - Ops.push_back(Glue); + SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32), + DAG.getConstant(C.CCMask, DL, MVT::i32), Glue}; SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); @@ -1826,11 +2429,58 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, // addition for it. if (Offset != 0) Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, - DAG.getConstant(Offset, PtrVT)); + DAG.getConstant(Offset, DL, PtrVT)); return Result; } +SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, + SelectionDAG &DAG, + unsigned Opcode, + SDValue GOTOffset) const { + SDLoc DL(Node); + EVT PtrVT = getPointerTy(); + SDValue Chain = DAG.getEntryNode(); + SDValue Glue; + + // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); + Glue = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); + Glue = Chain.getValue(1); + + // The first call operand is the chain and the second is the TLS symbol. + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, + Node->getValueType(0), + 0, 0)); + + // Add argument registers to the end of the list so that they are + // known live into the call. + Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); + Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = + TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Glue the call to the argument copies. + Ops.push_back(Glue); + + // Emit the call. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); + Glue = Chain.getValue(1); + + // Copy the return value from %r2. + return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); +} + SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const { SDLoc DL(Node); @@ -1838,33 +2488,94 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, EVT PtrVT = getPointerTy(); TLSModel::Model model = DAG.getTarget().getTLSModel(GV); - if (model != TLSModel::LocalExec) - llvm_unreachable("only local-exec TLS mode supported"); - // The high part of the thread pointer is in access register 0. SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(0, DL, MVT::i32)); TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); // The low part of the thread pointer is in access register 1. SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, DL, MVT::i32)); TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); // Merge them into a single 64-bit address. SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, - DAG.getConstant(32, PtrVT)); + DAG.getConstant(32, DL, PtrVT)); SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); - // Get the offset of GA from the thread pointer. - SystemZConstantPoolValue *CPV = - SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); + // Get the offset of GA from the thread pointer, based on the TLS model. + SDValue Offset; + switch (model) { + case TLSModel::GeneralDynamic: { + // Load the GOT offset of the tls_index (module ID / per-symbol offset). + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); - // Force the offset into the constant pool and load it from there. - SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8); - SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), - CPAddr, MachinePointerInfo::getConstantPool(), - false, false, false, 0); + // Call __tls_get_offset to retrieve the offset. + Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); + break; + } + + case TLSModel::LocalDynamic: { + // Load the GOT offset of the module ID. + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + + // Call __tls_get_offset to retrieve the module base offset. + Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); + + // Note: The SystemZLDCleanupPass will remove redundant computations + // of the module base offset. Count total number of local-dynamic + // accesses to trigger execution of that pass. + SystemZMachineFunctionInfo* MFI = + DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>(); + MFI->incNumLocalDynamicTLSAccesses(); + + // Add the per-symbol offset. + CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); + + SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); + DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + DTPOffset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + + Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); + break; + } + + case TLSModel::InitialExec: { + // Load the offset from the GOT. + Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + SystemZII::MO_INDNTPOFF); + Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getGOT(), + false, false, false, 0); + break; + } + + case TLSModel::LocalExec: { + // Force the offset into the constant pool and load it from there. + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + break; + } + } // Add the base and offset together. return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); @@ -1916,6 +2627,13 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, EVT InVT = In.getValueType(); EVT ResVT = Op.getValueType(); + // Convert loads directly. This is normally done by DAGCombiner, + // but we need this case for bitcasts that are created during lowering + // and which are then lowered themselves. + if (auto *LoadN = dyn_cast<LoadSDNode>(In)) + return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(), + LoadN->getMemOperand()); + if (InVT == MVT::i32 && ResVT == MVT::f32) { SDValue In64; if (Subtarget.hasHighWord()) { @@ -1926,22 +2644,22 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, } else { In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, - DAG.getConstant(32, MVT::i64)); + DAG.getConstant(32, DL, MVT::i64)); } SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); - return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, + return DAG.getTargetExtractSubreg(SystemZ::subreg_r32, DL, MVT::f32, Out64); } if (InVT == MVT::f32 && ResVT == MVT::i32) { SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); - SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, + SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL, MVT::f64, SDValue(U64, 0), In); SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); if (Subtarget.hasHighWord()) return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, MVT::i32, Out64); SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, - DAG.getConstant(32, MVT::i64)); + DAG.getConstant(32, DL, MVT::i64)); return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); } llvm_unreachable("Unexpected bitcast combination"); @@ -1962,8 +2680,8 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, // The initial values of each field. const unsigned NumFields = 4; SDValue Fields[NumFields] = { - DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), PtrVT), - DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), PtrVT), + DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT), + DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT), DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) }; @@ -1975,7 +2693,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, SDValue FieldAddr = Addr; if (Offset != 0) FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, - DAG.getIntPtrConstant(Offset)); + DAG.getIntPtrConstant(Offset, DL)); MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, MachinePointerInfo(SV, Offset), false, false, 0); @@ -1993,8 +2711,9 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); SDLoc DL(Op); - return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32), + return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL), /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, + /*isTailCall*/false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } @@ -2049,7 +2768,7 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, // multiplication: // // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) - SDValue C63 = DAG.getConstant(63, MVT::i64); + SDValue C63 = DAG.getConstant(63, DL, MVT::i64); SDValue LL = Op.getOperand(0); SDValue RL = Op.getOperand(1); SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); @@ -2187,6 +2906,81 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { MVT::i64, HighOp, Low32); } +SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + Op = Op.getOperand(0); + + // Handle vector types via VPOPCT. + if (VT.isVector()) { + Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op); + Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op); + switch (VT.getVectorElementType().getSizeInBits()) { + case 8: + break; + case 16: { + Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); + SDValue Shift = DAG.getConstant(8, DL, MVT::i32); + SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift); + Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); + Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift); + break; + } + case 32: { + SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, + DAG.getConstant(0, DL, MVT::i32)); + Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); + break; + } + case 64: { + SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, + DAG.getConstant(0, DL, MVT::i32)); + Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); + Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); + break; + } + default: + llvm_unreachable("Unexpected type"); + } + return Op; + } + + // Get the known-zero mask for the operand. + APInt KnownZero, KnownOne; + DAG.computeKnownBits(Op, KnownZero, KnownOne); + unsigned NumSignificantBits = (~KnownZero).getActiveBits(); + if (NumSignificantBits == 0) + return DAG.getConstant(0, DL, VT); + + // Skip known-zero high parts of the operand. + int64_t OrigBitSize = VT.getSizeInBits(); + int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits); + BitSize = std::min(BitSize, OrigBitSize); + + // The POPCNT instruction counts the number of bits in each byte. + Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); + Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); + Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); + + // Add up per-byte counts in a binary tree. All bits of Op at + // position larger than BitSize remain zero throughout. + for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { + SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT)); + if (BitSize != OrigBitSize) + Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, + DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT)); + Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); + } + + // Extract overall result from high byte. + if (BitSize > 8) + Op = DAG.getNode(ISD::SRL, DL, VT, Op, + DAG.getConstant(BitSize - 8, DL, VT)); + + return Op; +} + // Op is an atomic load. Lower it into a normal volatile load. SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { @@ -2233,23 +3027,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) { Opcode = SystemZISD::ATOMIC_LOADW_ADD; - Src2 = DAG.getConstant(-Const->getSExtValue(), Src2.getValueType()); + Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); } // Get the address of the containing word. SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, - DAG.getConstant(-4, PtrVT)); + DAG.getConstant(-4, DL, PtrVT)); // Get the number of bits that the word must be rotated left in order // to bring the field to the top bits of a GR32. SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, - DAG.getConstant(3, PtrVT)); + DAG.getConstant(3, DL, PtrVT)); BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); // Get the complementing shift amount, for rotating a field in the top // bits back to its proper position. SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, - DAG.getConstant(0, WideVT), BitShift); + DAG.getConstant(0, DL, WideVT), BitShift); // Extend the source operand to 32 bits and prepare it for the inner loop. // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other @@ -2258,23 +3052,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, // bits must be set, while for other opcodes they should be left clear. if (Opcode != SystemZISD::ATOMIC_SWAPW) Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, - DAG.getConstant(32 - BitSize, WideVT)); + DAG.getConstant(32 - BitSize, DL, WideVT)); if (Opcode == SystemZISD::ATOMIC_LOADW_AND || Opcode == SystemZISD::ATOMIC_LOADW_NAND) Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, - DAG.getConstant(uint32_t(-1) >> BitSize, WideVT)); + DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT)); // Construct the ATOMIC_LOADW_* node. SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, - DAG.getConstant(BitSize, WideVT) }; + DAG.getConstant(BitSize, DL, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, NarrowVT, MMO); // Rotate the result of the final CS so that the field is in the lower // bits of a GR32, then truncate it. SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, - DAG.getConstant(BitSize, WideVT)); + DAG.getConstant(BitSize, DL, WideVT)); SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; @@ -2300,10 +3094,10 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, // available or the negative value is in the range of A(G)FHI. int64_t Value = (-Op2->getAPIntValue()).getSExtValue(); if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1()) - NegSrc2 = DAG.getConstant(Value, MemVT); + NegSrc2 = DAG.getConstant(Value, DL, MemVT); } else if (Subtarget.hasInterlockedAccess1()) // Use LAA(G) if available. - NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, MemVT), + NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2); if (NegSrc2.getNode()) @@ -2342,23 +3136,23 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, // Get the address of the containing word. SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, - DAG.getConstant(-4, PtrVT)); + DAG.getConstant(-4, DL, PtrVT)); // Get the number of bits that the word must be rotated left in order // to bring the field to the top bits of a GR32. SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, - DAG.getConstant(3, PtrVT)); + DAG.getConstant(3, DL, PtrVT)); BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); // Get the complementing shift amount, for rotating a field in the top // bits back to its proper position. SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, - DAG.getConstant(0, WideVT), BitShift); + DAG.getConstant(0, DL, WideVT), BitShift); // Construct the ATOMIC_CMP_SWAPW node. SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, - NegBitShift, DAG.getConstant(BitSize, WideVT) }; + NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, VTList, Ops, NarrowVT, MMO); return AtomicOp; @@ -2387,19 +3181,1084 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, // Just preserve the chain. return Op.getOperand(0); + SDLoc DL(Op); bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; auto *Node = cast<MemIntrinsicSDNode>(Op.getNode()); SDValue Ops[] = { Op.getOperand(0), - DAG.getConstant(Code, MVT::i32), + DAG.getConstant(Code, DL, MVT::i32), Op.getOperand(1) }; - return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op), + return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, Node->getVTList(), Ops, Node->getMemoryVT(), Node->getMemOperand()); } +// Return an i32 that contains the value of CC immediately after After, +// whose final operand must be MVT::Glue. +static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) { + SDLoc DL(After); + SDValue Glue = SDValue(After, After->getNumValues() - 1); + SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, + DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); +} + +SDValue +SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opcode, CCValid; + if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { + assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); + SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode); + SDValue CC = getCCResult(DAG, Glued.getNode()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); + return SDValue(); + } + + return SDValue(); +} + +SDValue +SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opcode, CCValid; + if (isIntrinsicWithCC(Op, Opcode, CCValid)) { + SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode); + SDValue CC = getCCResult(DAG, Glued.getNode()); + if (Op->getNumValues() == 1) + return CC; + assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), + Glued, CC); + } + + unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + switch (Id) { + case Intrinsic::s390_vpdi: + return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::s390_vperm: + return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::s390_vuphb: + case Intrinsic::s390_vuphh: + case Intrinsic::s390_vuphf: + return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vuplhb: + case Intrinsic::s390_vuplhh: + case Intrinsic::s390_vuplhf: + return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vuplb: + case Intrinsic::s390_vuplhw: + case Intrinsic::s390_vuplf: + return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vupllb: + case Intrinsic::s390_vupllh: + case Intrinsic::s390_vupllf: + return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); + + case Intrinsic::s390_vsumb: + case Intrinsic::s390_vsumh: + case Intrinsic::s390_vsumgh: + case Intrinsic::s390_vsumgf: + case Intrinsic::s390_vsumqf: + case Intrinsic::s390_vsumqg: + return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } + + return SDValue(); +} + +namespace { +// Says that SystemZISD operation Opcode can be used to perform the equivalent +// of a VPERM with permute vector Bytes. If Opcode takes three operands, +// Operand is the constant third operand, otherwise it is the number of +// bytes in each element of the result. +struct Permute { + unsigned Opcode; + unsigned Operand; + unsigned char Bytes[SystemZ::VectorBytes]; +}; +} + +static const Permute PermuteForms[] = { + // VMRHG + { SystemZISD::MERGE_HIGH, 8, + { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } }, + // VMRHF + { SystemZISD::MERGE_HIGH, 4, + { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, + // VMRHH + { SystemZISD::MERGE_HIGH, 2, + { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, + // VMRHB + { SystemZISD::MERGE_HIGH, 1, + { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, + // VMRLG + { SystemZISD::MERGE_LOW, 8, + { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } }, + // VMRLF + { SystemZISD::MERGE_LOW, 4, + { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, + // VMRLH + { SystemZISD::MERGE_LOW, 2, + { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, + // VMRLB + { SystemZISD::MERGE_LOW, 1, + { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, + // VPKG + { SystemZISD::PACK, 4, + { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } }, + // VPKF + { SystemZISD::PACK, 2, + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, + // VPKH + { SystemZISD::PACK, 1, + { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, + // VPDI V1, V2, 4 (low half of V1, high half of V2) + { SystemZISD::PERMUTE_DWORDS, 4, + { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } }, + // VPDI V1, V2, 1 (high half of V1, low half of V2) + { SystemZISD::PERMUTE_DWORDS, 1, + { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } } +}; + +// Called after matching a vector shuffle against a particular pattern. +// Both the original shuffle and the pattern have two vector operands. +// OpNos[0] is the operand of the original shuffle that should be used for +// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything. +// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and +// set OpNo0 and OpNo1 to the shuffle operands that should actually be used +// for operands 0 and 1 of the pattern. +static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) { + if (OpNos[0] < 0) { + if (OpNos[1] < 0) + return false; + OpNo0 = OpNo1 = OpNos[1]; + } else if (OpNos[1] < 0) { + OpNo0 = OpNo1 = OpNos[0]; + } else { + OpNo0 = OpNos[0]; + OpNo1 = OpNos[1]; + } + return true; +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. Return true if the VPERM can be implemented using P. +// When returning true set OpNo0 to the VPERM operand that should be +// used for operand 0 of P and likewise OpNo1 for operand 1 of P. +// +// For example, if swapping the VPERM operands allows P to match, OpNo0 +// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one +// operand, but rewriting it to use two duplicated operands allows it to +// match P, then OpNo0 and OpNo1 will be the same. +static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P, + unsigned &OpNo0, unsigned &OpNo1) { + int OpNos[] = { -1, -1 }; + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { + int Elt = Bytes[I]; + if (Elt >= 0) { + // Make sure that the two permute vectors use the same suboperand + // byte number. Only the operand numbers (the high bits) are + // allowed to differ. + if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1)) + return false; + int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes; + int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes; + // Make sure that the operand mappings are consistent with previous + // elements. + if (OpNos[ModelOpNo] == 1 - RealOpNo) + return false; + OpNos[ModelOpNo] = RealOpNo; + } + } + return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); +} + +// As above, but search for a matching permute. +static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes, + unsigned &OpNo0, unsigned &OpNo1) { + for (auto &P : PermuteForms) + if (matchPermute(Bytes, P, OpNo0, OpNo1)) + return &P; + return nullptr; +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. This permute is an operand of an outer permute. +// See whether redistributing the -1 bytes gives a shuffle that can be +// implemented using P. If so, set Transform to a VPERM-like permute vector +// that, when applied to the result of P, gives the original permute in Bytes. +static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes, + const Permute &P, + SmallVectorImpl<int> &Transform) { + unsigned To = 0; + for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) { + int Elt = Bytes[From]; + if (Elt < 0) + // Byte number From of the result is undefined. + Transform[From] = -1; + else { + while (P.Bytes[To] != Elt) { + To += 1; + if (To == SystemZ::VectorBytes) + return false; + } + Transform[From] = To; + } + } + return true; +} + +// As above, but search for a matching permute. +static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes, + SmallVectorImpl<int> &Transform) { + for (auto &P : PermuteForms) + if (matchDoublePermute(Bytes, P, Transform)) + return &P; + return nullptr; +} + +// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask, +// as if it had type vNi8. +static void getVPermMask(ShuffleVectorSDNode *VSN, + SmallVectorImpl<int> &Bytes) { + EVT VT = VSN->getValueType(0); + unsigned NumElements = VT.getVectorNumElements(); + unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); + Bytes.resize(NumElements * BytesPerElement, -1); + for (unsigned I = 0; I < NumElements; ++I) { + int Index = VSN->getMaskElt(I); + if (Index >= 0) + for (unsigned J = 0; J < BytesPerElement; ++J) + Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; + } +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of +// the result come from a contiguous sequence of bytes from one input. +// Set Base to the selector for the first byte if so. +static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start, + unsigned BytesPerElement, int &Base) { + Base = -1; + for (unsigned I = 0; I < BytesPerElement; ++I) { + if (Bytes[Start + I] >= 0) { + unsigned Elem = Bytes[Start + I]; + if (Base < 0) { + Base = Elem - I; + // Make sure the bytes would come from one input operand. + if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size()) + return false; + } else if (unsigned(Base) != Elem - I) + return false; + } + } + return true; +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. Return true if it can be performed using VSLDI. +// When returning true, set StartIndex to the shift amount and OpNo0 +// and OpNo1 to the VPERM operands that should be used as the first +// and second shift operand respectively. +static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes, + unsigned &StartIndex, unsigned &OpNo0, + unsigned &OpNo1) { + int OpNos[] = { -1, -1 }; + int Shift = -1; + for (unsigned I = 0; I < 16; ++I) { + int Index = Bytes[I]; + if (Index >= 0) { + int ExpectedShift = (Index - I) % SystemZ::VectorBytes; + int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes; + int RealOpNo = unsigned(Index) / SystemZ::VectorBytes; + if (Shift < 0) + Shift = ExpectedShift; + else if (Shift != ExpectedShift) + return false; + // Make sure that the operand mappings are consistent with previous + // elements. + if (OpNos[ModelOpNo] == 1 - RealOpNo) + return false; + OpNos[ModelOpNo] = RealOpNo; + } + } + StartIndex = Shift; + return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); +} + +// Create a node that performs P on operands Op0 and Op1, casting the +// operands to the appropriate type. The type of the result is determined by P. +static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL, + const Permute &P, SDValue Op0, SDValue Op1) { + // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input + // elements of a PACK are twice as wide as the outputs. + unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 : + P.Opcode == SystemZISD::PACK ? P.Operand * 2 : + P.Operand); + // Cast both operands to the appropriate type. + MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8), + SystemZ::VectorBytes / InBytes); + Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0); + Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); + SDValue Op; + if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { + SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32); + Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); + } else if (P.Opcode == SystemZISD::PACK) { + MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), + SystemZ::VectorBytes / P.Operand); + Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1); + } else { + Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1); + } + return Op; +} + +// Bytes is a VPERM-like permute vector, except that -1 is used for +// undefined bytes. Implement it on operands Ops[0] and Ops[1] using +// VSLDI or VPERM. +static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops, + const SmallVectorImpl<int> &Bytes) { + for (unsigned I = 0; I < 2; ++I) + Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); + + // First see whether VSLDI can be used. + unsigned StartIndex, OpNo0, OpNo1; + if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) + return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], + Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32)); + + // Fall back on VPERM. Construct an SDNode for the permute vector. + SDValue IndexNodes[SystemZ::VectorBytes]; + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) + if (Bytes[I] >= 0) + IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32); + else + IndexNodes[I] = DAG.getUNDEF(MVT::i32); + SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes); + return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2); +} + +namespace { +// Describes a general N-operand vector shuffle. +struct GeneralShuffle { + GeneralShuffle(EVT vt) : VT(vt) {} + void addUndef(); + void add(SDValue, unsigned); + SDValue getNode(SelectionDAG &, SDLoc); + + // The operands of the shuffle. + SmallVector<SDValue, SystemZ::VectorBytes> Ops; + + // Index I is -1 if byte I of the result is undefined. Otherwise the + // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand + // Bytes[I] / SystemZ::VectorBytes. + SmallVector<int, SystemZ::VectorBytes> Bytes; + + // The type of the shuffle result. + EVT VT; +}; +} + +// Add an extra undefined element to the shuffle. +void GeneralShuffle::addUndef() { + unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); + for (unsigned I = 0; I < BytesPerElement; ++I) + Bytes.push_back(-1); +} + +// Add an extra element to the shuffle, taking it from element Elem of Op. +// A null Op indicates a vector input whose value will be calculated later; +// there is at most one such input per shuffle and it always has the same +// type as the result. +void GeneralShuffle::add(SDValue Op, unsigned Elem) { + unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); + + // The source vector can have wider elements than the result, + // either through an explicit TRUNCATE or because of type legalization. + // We want the least significant part. + EVT FromVT = Op.getNode() ? Op.getValueType() : VT; + unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); + assert(FromBytesPerElement >= BytesPerElement && + "Invalid EXTRACT_VECTOR_ELT"); + unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + + (FromBytesPerElement - BytesPerElement)); + + // Look through things like shuffles and bitcasts. + while (Op.getNode()) { + if (Op.getOpcode() == ISD::BITCAST) + Op = Op.getOperand(0); + else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) { + // See whether the bytes we need come from a contiguous part of one + // operand. + SmallVector<int, SystemZ::VectorBytes> OpBytes; + getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes); + int NewByte; + if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte)) + break; + if (NewByte < 0) { + addUndef(); + return; + } + Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); + Byte = unsigned(NewByte) % SystemZ::VectorBytes; + } else if (Op.getOpcode() == ISD::UNDEF) { + addUndef(); + return; + } else + break; + } + + // Make sure that the source of the extraction is in Ops. + unsigned OpNo = 0; + for (; OpNo < Ops.size(); ++OpNo) + if (Ops[OpNo] == Op) + break; + if (OpNo == Ops.size()) + Ops.push_back(Op); + + // Add the element to Bytes. + unsigned Base = OpNo * SystemZ::VectorBytes + Byte; + for (unsigned I = 0; I < BytesPerElement; ++I) + Bytes.push_back(Base + I); +} + +// Return SDNodes for the completed shuffle. +SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) { + assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector"); + + if (Ops.size() == 0) + return DAG.getUNDEF(VT); + + // Make sure that there are at least two shuffle operands. + if (Ops.size() == 1) + Ops.push_back(DAG.getUNDEF(MVT::v16i8)); + + // Create a tree of shuffles, deferring root node until after the loop. + // Try to redistribute the undefined elements of non-root nodes so that + // the non-root shuffles match something like a pack or merge, then adjust + // the parent node's permute vector to compensate for the new order. + // Among other things, this copes with vectors like <2 x i16> that were + // padded with undefined elements during type legalization. + // + // In the best case this redistribution will lead to the whole tree + // using packs and merges. It should rarely be a loss in other cases. + unsigned Stride = 1; + for (; Stride * 2 < Ops.size(); Stride *= 2) { + for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) { + SDValue SubOps[] = { Ops[I], Ops[I + Stride] }; + + // Create a mask for just these two operands. + SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes); + for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { + unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes; + unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes; + if (OpNo == I) + NewBytes[J] = Byte; + else if (OpNo == I + Stride) + NewBytes[J] = SystemZ::VectorBytes + Byte; + else + NewBytes[J] = -1; + } + // See if it would be better to reorganize NewMask to avoid using VPERM. + SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes); + if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) { + Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]); + // Applying NewBytesMap to Ops[I] gets back to NewBytes. + for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { + if (NewBytes[J] >= 0) { + assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && + "Invalid double permute"); + Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J]; + } else + assert(NewBytesMap[J] < 0 && "Invalid double permute"); + } + } else { + // Just use NewBytes on the operands. + Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes); + for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) + if (NewBytes[J] >= 0) + Bytes[J] = I * SystemZ::VectorBytes + J; + } + } + } + + // Now we just have 2 inputs. Put the second operand in Ops[1]. + if (Stride > 1) { + Ops[1] = Ops[Stride]; + for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) + if (Bytes[I] >= int(SystemZ::VectorBytes)) + Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes; + } + + // Look for an instruction that can do the permute without resorting + // to VPERM. + unsigned OpNo0, OpNo1; + SDValue Op; + if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) + Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]); + else + Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); +} + +// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. +static bool isScalarToVector(SDValue Op) { + for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) + if (Op.getOperand(I).getOpcode() != ISD::UNDEF) + return false; + return true; +} + +// Return a vector of type VT that contains Value in the first element. +// The other elements don't matter. +static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT, + SDValue Value) { + // If we have a constant, replicate it to all elements and let the + // BUILD_VECTOR lowering take care of it. + if (Value.getOpcode() == ISD::Constant || + Value.getOpcode() == ISD::ConstantFP) { + SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + } + if (Value.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); +} + +// Return a vector of type VT in which Op0 is in element 0 and Op1 is in +// element 1. Used for cases in which replication is cheap. +static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT, + SDValue Op0, SDValue Op1) { + if (Op0.getOpcode() == ISD::UNDEF) { + if (Op1.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); + } + if (Op1.getOpcode() == ISD::UNDEF) + return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); + return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, + buildScalarToVector(DAG, DL, VT, Op0), + buildScalarToVector(DAG, DL, VT, Op1)); +} + +// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 +// vector for them. +static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0, + SDValue Op1) { + if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(MVT::v2i64); + // If one of the two inputs is undefined then replicate the other one, + // in order to avoid using another register unnecessarily. + if (Op0.getOpcode() == ISD::UNDEF) + Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); + else if (Op1.getOpcode() == ISD::UNDEF) + Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); + else { + Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); + Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); + } + return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); +} + +// Try to represent constant BUILD_VECTOR node BVN using a +// SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask +// on success. +static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { + EVT ElemVT = BVN->getValueType(0).getVectorElementType(); + unsigned BytesPerElement = ElemVT.getStoreSize(); + for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { + SDValue Op = BVN->getOperand(I); + if (Op.getOpcode() != ISD::UNDEF) { + uint64_t Value; + if (Op.getOpcode() == ISD::Constant) + Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue(); + else if (Op.getOpcode() == ISD::ConstantFP) + Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt() + .getZExtValue()); + else + return false; + for (unsigned J = 0; J < BytesPerElement; ++J) { + uint64_t Byte = (Value >> (J * 8)) & 0xff; + if (Byte == 0xff) + Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J); + else if (Byte != 0) + return false; + } + } + } + return true; +} + +// Try to load a vector constant in which BitsPerElement-bit value Value +// is replicated to fill the vector. VT is the type of the resulting +// constant, which may have elements of a different size from BitsPerElement. +// Return the SDValue of the constant on success, otherwise return +// an empty value. +static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, + const SystemZInstrInfo *TII, + SDLoc DL, EVT VT, uint64_t Value, + unsigned BitsPerElement) { + // Signed 16-bit values can be replicated using VREPI. + int64_t SignedValue = SignExtend64(Value, BitsPerElement); + if (isInt<16>(SignedValue)) { + MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), + SystemZ::VectorBits / BitsPerElement); + SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT, + DAG.getConstant(SignedValue, DL, MVT::i32)); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } + // See whether rotating the constant left some N places gives a value that + // is one less than a power of 2 (i.e. all zeros followed by all ones). + // If so we can use VGM. + unsigned Start, End; + if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) { + // isRxSBGMask returns the bit numbers for a full 64-bit value, + // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to + // bit numbers for an BitsPerElement value, so that 0 denotes + // 1 << (BitsPerElement-1). + Start -= 64 - BitsPerElement; + End -= 64 - BitsPerElement; + MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), + SystemZ::VectorBits / BitsPerElement); + SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT, + DAG.getConstant(Start, DL, MVT::i32), + DAG.getConstant(End, DL, MVT::i32)); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } + return SDValue(); +} + +// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually +// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for +// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR +// would benefit from this representation and return it if so. +static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, + BuildVectorSDNode *BVN) { + EVT VT = BVN->getValueType(0); + unsigned NumElements = VT.getVectorNumElements(); + + // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation + // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still + // need a BUILD_VECTOR, add an additional placeholder operand for that + // BUILD_VECTOR and store its operands in ResidueOps. + GeneralShuffle GS(VT); + SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps; + bool FoundOne = false; + for (unsigned I = 0; I < NumElements; ++I) { + SDValue Op = BVN->getOperand(I); + if (Op.getOpcode() == ISD::TRUNCATE) + Op = Op.getOperand(0); + if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op.getOperand(1).getOpcode() == ISD::Constant) { + unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + GS.add(Op.getOperand(0), Elem); + FoundOne = true; + } else if (Op.getOpcode() == ISD::UNDEF) { + GS.addUndef(); + } else { + GS.add(SDValue(), ResidueOps.size()); + ResidueOps.push_back(Op); + } + } + + // Nothing to do if there are no EXTRACT_VECTOR_ELTs. + if (!FoundOne) + return SDValue(); + + // Create the BUILD_VECTOR for the remaining elements, if any. + if (!ResidueOps.empty()) { + while (ResidueOps.size() < NumElements) + ResidueOps.push_back(DAG.getUNDEF(VT.getVectorElementType())); + for (auto &Op : GS.Ops) { + if (!Op.getNode()) { + Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps); + break; + } + } + } + return GS.getNode(DAG, SDLoc(BVN)); +} + +// Combine GPR scalar values Elems into a vector of type VT. +static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, + SmallVectorImpl<SDValue> &Elems) { + // See whether there is a single replicated value. + SDValue Single; + unsigned int NumElements = Elems.size(); + unsigned int Count = 0; + for (auto Elem : Elems) { + if (Elem.getOpcode() != ISD::UNDEF) { + if (!Single.getNode()) + Single = Elem; + else if (Elem != Single) { + Single = SDValue(); + break; + } + Count += 1; + } + } + // There are three cases here: + // + // - if the only defined element is a loaded one, the best sequence + // is a replicating load. + // + // - otherwise, if the only defined element is an i64 value, we will + // end up with the same VLVGP sequence regardless of whether we short-cut + // for replication or fall through to the later code. + // + // - otherwise, if the only defined element is an i32 or smaller value, + // we would need 2 instructions to replicate it: VLVGP followed by VREPx. + // This is only a win if the single defined element is used more than once. + // In other cases we're better off using a single VLVGx. + if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD)) + return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); + + // The best way of building a v2i64 from two i64s is to use VLVGP. + if (VT == MVT::v2i64) + return joinDwords(DAG, DL, Elems[0], Elems[1]); + + // Use a 64-bit merge high to combine two doubles. + if (VT == MVT::v2f64) + return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); + + // Build v4f32 values directly from the FPRs: + // + // <Axxx> <Bxxx> <Cxxxx> <Dxxx> + // V V VMRHF + // <ABxx> <CDxx> + // V VMRHG + // <ABCD> + if (VT == MVT::v4f32) { + SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); + SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); + // Avoid unnecessary undefs by reusing the other operand. + if (Op01.getOpcode() == ISD::UNDEF) + Op01 = Op23; + else if (Op23.getOpcode() == ISD::UNDEF) + Op23 = Op01; + // Merging identical replications is a no-op. + if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) + return Op01; + Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01); + Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23); + SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, + DL, MVT::v2i64, Op01, Op23); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } + + // Collect the constant terms. + SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue()); + SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false); + + unsigned NumConstants = 0; + for (unsigned I = 0; I < NumElements; ++I) { + SDValue Elem = Elems[I]; + if (Elem.getOpcode() == ISD::Constant || + Elem.getOpcode() == ISD::ConstantFP) { + NumConstants += 1; + Constants[I] = Elem; + Done[I] = true; + } + } + // If there was at least one constant, fill in the other elements of + // Constants with undefs to get a full vector constant and use that + // as the starting point. + SDValue Result; + if (NumConstants > 0) { + for (unsigned I = 0; I < NumElements; ++I) + if (!Constants[I].getNode()) + Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); + Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants); + } else { + // Otherwise try to use VLVGP to start the sequence in order to + // avoid a false dependency on any previous contents of the vector + // register. This only makes sense if one of the associated elements + // is defined. + unsigned I1 = NumElements / 2 - 1; + unsigned I2 = NumElements - 1; + bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF); + bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF); + if (Def1 || Def2) { + SDValue Elem1 = Elems[Def1 ? I1 : I2]; + SDValue Elem2 = Elems[Def2 ? I2 : I1]; + Result = DAG.getNode(ISD::BITCAST, DL, VT, + joinDwords(DAG, DL, Elem1, Elem2)); + Done[I1] = true; + Done[I2] = true; + } else + Result = DAG.getUNDEF(VT); + } + + // Use VLVGx to insert the other elements. + for (unsigned I = 0; I < NumElements; ++I) + if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF) + Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], + DAG.getConstant(I, DL, MVT::i32)); + return Result; +} + +SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); + auto *BVN = cast<BuildVectorSDNode>(Op.getNode()); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + if (BVN->isConstant()) { + // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- + // preferred way of creating all-zero and all-one vectors so give it + // priority over other methods below. + uint64_t Mask = 0; + if (tryBuildVectorByteMask(BVN, Mask)) { + SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, + DAG.getConstant(Mask, DL, MVT::i32)); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } + + // Try using some form of replication. + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, + 8, true) && + SplatBitSize <= 64) { + // First try assuming that any undefined bits above the highest set bit + // and below the lowest set bit are 1s. This increases the likelihood of + // being able to use a sign-extended element value in VECTOR REPLICATE + // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. + uint64_t SplatBitsZ = SplatBits.getZExtValue(); + uint64_t SplatUndefZ = SplatUndef.getZExtValue(); + uint64_t Lower = (SplatUndefZ + & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); + uint64_t Upper = (SplatUndefZ + & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); + uint64_t Value = SplatBitsZ | Upper | Lower; + SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, + SplatBitSize); + if (Op.getNode()) + return Op; + + // Now try assuming that any undefined bits between the first and + // last defined set bits are set. This increases the chances of + // using a non-wraparound mask. + uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; + Value = SplatBitsZ | Middle; + Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize); + if (Op.getNode()) + return Op; + } + + // Fall back to loading it from memory. + return SDValue(); + } + + // See if we should use shuffles to construct the vector from other vectors. + SDValue Res = tryBuildVectorShuffle(DAG, BVN); + if (Res.getNode()) + return Res; + + // Detect SCALAR_TO_VECTOR conversions. + if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) + return buildScalarToVector(DAG, DL, VT, Op.getOperand(0)); + + // Otherwise use buildVector to build the vector up from GPRs. + unsigned NumElements = Op.getNumOperands(); + SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements); + for (unsigned I = 0; I < NumElements; ++I) + Ops[I] = Op.getOperand(I); + return buildVector(DAG, DL, VT, Ops); +} + +SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode()); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + unsigned NumElements = VT.getVectorNumElements(); + + if (VSN->isSplat()) { + SDValue Op0 = Op.getOperand(0); + unsigned Index = VSN->getSplatIndex(); + assert(Index < VT.getVectorNumElements() && + "Splat index should be defined and in first operand"); + // See whether the value we're splatting is directly available as a scalar. + if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) || + Op0.getOpcode() == ISD::BUILD_VECTOR) + return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); + // Otherwise keep it as a vector-to-vector operation. + return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), + DAG.getConstant(Index, DL, MVT::i32)); + } + + GeneralShuffle GS(VT); + for (unsigned I = 0; I < NumElements; ++I) { + int Elt = VSN->getMaskElt(I); + if (Elt < 0) + GS.addUndef(); + else + GS.add(Op.getOperand(unsigned(Elt) / NumElements), + unsigned(Elt) % NumElements); + } + return GS.getNode(DAG, SDLoc(VSN)); +} + +SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + // Just insert the scalar into element 0 of an undefined vector. + return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, + Op.getValueType(), DAG.getUNDEF(Op.getValueType()), + Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); +} + +SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + // Handle insertions of floating-point values. + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + EVT VT = Op.getValueType(); + + // Insertions into constant indices of a v2f64 can be done using VPDI. + // However, if the inserted value is a bitcast or a constant then it's + // better to use GPRs, as below. + if (VT == MVT::v2f64 && + Op1.getOpcode() != ISD::BITCAST && + Op1.getOpcode() != ISD::ConstantFP && + Op2.getOpcode() == ISD::Constant) { + uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue(); + unsigned Mask = VT.getVectorNumElements() - 1; + if (Index <= Mask) + return Op; + } + + // Otherwise bitcast to the equivalent integer form and insert via a GPR. + MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits()); + MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements()); + SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT, + DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), + DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2); + return DAG.getNode(ISD::BITCAST, DL, VT, Res); +} + +SDValue +SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + // Handle extractions of floating-point values. + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + EVT VT = Op.getValueType(); + EVT VecVT = Op0.getValueType(); + + // Extractions of constant indices can be done directly. + if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) { + uint64_t Index = CIndexN->getZExtValue(); + unsigned Mask = VecVT.getVectorNumElements() - 1; + if (Index <= Mask) + return Op; + } + + // Otherwise bitcast to the equivalent integer form and extract via a GPR. + MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); + MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements()); + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT, + DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1); + return DAG.getNode(ISD::BITCAST, DL, VT, Res); +} + +SDValue +SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, + unsigned UnpackHigh) const { + SDValue PackedOp = Op.getOperand(0); + EVT OutVT = Op.getValueType(); + EVT InVT = PackedOp.getValueType(); + unsigned ToBits = OutVT.getVectorElementType().getSizeInBits(); + unsigned FromBits = InVT.getVectorElementType().getSizeInBits(); + do { + FromBits *= 2; + EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), + SystemZ::VectorBits / FromBits); + PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp); + } while (FromBits != ToBits); + return PackedOp; +} + +SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, + unsigned ByScalar) const { + // Look for cases where a vector shift can use the *_BY_SCALAR form. + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits(); + + // See whether the shift vector is a splat represented as BUILD_VECTOR. + if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) { + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + // Check for constant splats. Use ElemBitSize as the minimum element + // width and reject splats that need wider elements. + if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, + ElemBitSize, true) && + SplatBitSize == ElemBitSize) { + SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff, + DL, MVT::i32); + return DAG.getNode(ByScalar, DL, VT, Op0, Shift); + } + // Check for variable splats. + BitVector UndefElements; + SDValue Splat = BVN->getSplatValue(&UndefElements); + if (Splat) { + // Since i32 is the smallest legal type, we either need a no-op + // or a truncation. + SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat); + return DAG.getNode(ByScalar, DL, VT, Op0, Shift); + } + } + + // See whether the shift vector is a splat represented as SHUFFLE_VECTOR, + // and the shift amount is directly available in a GPR. + if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) { + if (VSN->isSplat()) { + SDValue VSNOp0 = VSN->getOperand(0); + unsigned Index = VSN->getSplatIndex(); + assert(Index < VT.getVectorNumElements() && + "Splat index should be defined and in first operand"); + if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) || + VSNOp0.getOpcode() == ISD::BUILD_VECTOR) { + // Since i32 is the smallest legal type, we either need a no-op + // or a truncation. + SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, + VSNOp0.getOperand(Index)); + return DAG.getNode(ByScalar, DL, VT, Op0, Shift); + } + } + } + + // Otherwise just treat the current form as legal. + return Op; +} + SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -2437,6 +4296,14 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerUDIVREM(Op, DAG); case ISD::OR: return lowerOR(Op, DAG); + case ISD::CTPOP: + return lowerCTPOP(Op, DAG); + case ISD::CTLZ_ZERO_UNDEF: + return DAG.getNode(ISD::CTLZ, SDLoc(Op), + Op.getValueType(), Op.getOperand(0)); + case ISD::CTTZ_ZERO_UNDEF: + return DAG.getNode(ISD::CTTZ, SDLoc(Op), + Op.getValueType(), Op.getOperand(0)); case ISD::ATOMIC_SWAP: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); case ISD::ATOMIC_STORE: @@ -2471,6 +4338,30 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerSTACKRESTORE(Op, DAG); case ISD::PREFETCH: return lowerPREFETCH(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return lowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::BUILD_VECTOR: + return lowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return lowerVECTOR_SHUFFLE(Op, DAG); + case ISD::SCALAR_TO_VECTOR: + return lowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return lowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH); + case ISD::SHL: + return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); + case ISD::SRL: + return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR); + case ISD::SRA: + return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); default: llvm_unreachable("Unexpected node to lower"); } @@ -2478,10 +4369,13 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME - switch (Opcode) { + switch ((SystemZISD::NodeType)Opcode) { + case SystemZISD::FIRST_NUMBER: break; OPCODE(RET_FLAG); OPCODE(CALL); OPCODE(SIBCALL); + OPCODE(TLS_GDCALL); + OPCODE(TLS_LDCALL); OPCODE(PCREL_WRAPPER); OPCODE(PCREL_OFFSET); OPCODE(IABS); @@ -2492,7 +4386,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SELECT_CCMASK); OPCODE(ADJDYNALLOC); OPCODE(EXTRACT_ACCESS); + OPCODE(POPCNT); OPCODE(UMUL_LOHI64); + OPCODE(SDIVREM32); OPCODE(SDIVREM64); OPCODE(UDIVREM32); OPCODE(UDIVREM64); @@ -2506,11 +4402,60 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(XC_LOOP); OPCODE(CLC); OPCODE(CLC_LOOP); - OPCODE(STRCMP); OPCODE(STPCPY); + OPCODE(STRCMP); OPCODE(SEARCH_STRING); OPCODE(IPM); OPCODE(SERIALIZE); + OPCODE(TBEGIN); + OPCODE(TBEGIN_NOFLOAT); + OPCODE(TEND); + OPCODE(BYTE_MASK); + OPCODE(ROTATE_MASK); + OPCODE(REPLICATE); + OPCODE(JOIN_DWORDS); + OPCODE(SPLAT); + OPCODE(MERGE_HIGH); + OPCODE(MERGE_LOW); + OPCODE(SHL_DOUBLE); + OPCODE(PERMUTE_DWORDS); + OPCODE(PERMUTE); + OPCODE(PACK); + OPCODE(PACKS_CC); + OPCODE(PACKLS_CC); + OPCODE(UNPACK_HIGH); + OPCODE(UNPACKL_HIGH); + OPCODE(UNPACK_LOW); + OPCODE(UNPACKL_LOW); + OPCODE(VSHL_BY_SCALAR); + OPCODE(VSRL_BY_SCALAR); + OPCODE(VSRA_BY_SCALAR); + OPCODE(VSUM); + OPCODE(VICMPE); + OPCODE(VICMPH); + OPCODE(VICMPHL); + OPCODE(VICMPES); + OPCODE(VICMPHS); + OPCODE(VICMPHLS); + OPCODE(VFCMPE); + OPCODE(VFCMPH); + OPCODE(VFCMPHE); + OPCODE(VFCMPES); + OPCODE(VFCMPHS); + OPCODE(VFCMPHES); + OPCODE(VFTCI); + OPCODE(VEXTEND); + OPCODE(VROUND); + OPCODE(VTM); + OPCODE(VFAE_CC); + OPCODE(VFAEZ_CC); + OPCODE(VFEE_CC); + OPCODE(VFEEZ_CC); + OPCODE(VFENE_CC); + OPCODE(VFENEZ_CC); + OPCODE(VISTR_CC); + OPCODE(VSTRC_CC); + OPCODE(VSTRCZ_CC); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); @@ -2529,6 +4474,157 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { #undef OPCODE } +// Return true if VT is a vector whose elements are a whole number of bytes +// in width. +static bool canTreatAsByteVector(EVT VT) { + return VT.isVector() && VT.getVectorElementType().getSizeInBits() % 8 == 0; +} + +// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT +// producing a result of type ResVT. Op is a possibly bitcast version +// of the input vector and Index is the index (based on type VecVT) that +// should be extracted. Return the new extraction if a simplification +// was possible or if Force is true. +SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT, + SDValue Op, unsigned Index, + DAGCombinerInfo &DCI, + bool Force) const { + SelectionDAG &DAG = DCI.DAG; + + // The number of bytes being extracted. + unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); + + for (;;) { + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::BITCAST) + // Look through bitcasts. + Op = Op.getOperand(0); + else if (Opcode == ISD::VECTOR_SHUFFLE && + canTreatAsByteVector(Op.getValueType())) { + // Get a VPERM-like permute mask and see whether the bytes covered + // by the extracted element are a contiguous sequence from one + // source operand. + SmallVector<int, SystemZ::VectorBytes> Bytes; + getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes); + int First; + if (!getShuffleInput(Bytes, Index * BytesPerElement, + BytesPerElement, First)) + break; + if (First < 0) + return DAG.getUNDEF(ResVT); + // Make sure the contiguous sequence starts at a multiple of the + // original element size. + unsigned Byte = unsigned(First) % Bytes.size(); + if (Byte % BytesPerElement != 0) + break; + // We can get the extracted value directly from an input. + Index = Byte / BytesPerElement; + Op = Op.getOperand(unsigned(First) / Bytes.size()); + Force = true; + } else if (Opcode == ISD::BUILD_VECTOR && + canTreatAsByteVector(Op.getValueType())) { + // We can only optimize this case if the BUILD_VECTOR elements are + // at least as wide as the extracted value. + EVT OpVT = Op.getValueType(); + unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); + if (OpBytesPerElement < BytesPerElement) + break; + // Make sure that the least-significant bit of the extracted value + // is the least significant bit of an input. + unsigned End = (Index + 1) * BytesPerElement; + if (End % OpBytesPerElement != 0) + break; + // We're extracting the low part of one operand of the BUILD_VECTOR. + Op = Op.getOperand(End / OpBytesPerElement - 1); + if (!Op.getValueType().isInteger()) { + EVT VT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()); + Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); + DCI.AddToWorklist(Op.getNode()); + } + EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits()); + Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); + if (VT != ResVT) { + DCI.AddToWorklist(Op.getNode()); + Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op); + } + return Op; + } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || + Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || + Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && + canTreatAsByteVector(Op.getValueType()) && + canTreatAsByteVector(Op.getOperand(0).getValueType())) { + // Make sure that only the unextended bits are significant. + EVT ExtVT = Op.getValueType(); + EVT OpVT = Op.getOperand(0).getValueType(); + unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize(); + unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); + unsigned Byte = Index * BytesPerElement; + unsigned SubByte = Byte % ExtBytesPerElement; + unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; + if (SubByte < MinSubByte || + SubByte + BytesPerElement > ExtBytesPerElement) + break; + // Get the byte offset of the unextended element + Byte = Byte / ExtBytesPerElement * OpBytesPerElement; + // ...then add the byte offset relative to that element. + Byte += SubByte - MinSubByte; + if (Byte % BytesPerElement != 0) + break; + Op = Op.getOperand(0); + Index = Byte / BytesPerElement; + Force = true; + } else + break; + } + if (Force) { + if (Op.getValueType() != VecVT) { + Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op); + DCI.AddToWorklist(Op.getNode()); + } + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op, + DAG.getConstant(Index, DL, MVT::i32)); + } + return SDValue(); +} + +// Optimize vector operations in scalar value Op on the basis that Op +// is truncated to TruncVT. +SDValue +SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op, + DAGCombinerInfo &DCI) const { + // If we have (trunc (extract_vector_elt X, Y)), try to turn it into + // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements + // of type TruncVT. + if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + TruncVT.getSizeInBits() % 8 == 0) { + SDValue Vec = Op.getOperand(0); + EVT VecVT = Vec.getValueType(); + if (canTreatAsByteVector(VecVT)) { + if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); + unsigned TruncBytes = TruncVT.getStoreSize(); + if (BytesPerElement % TruncBytes == 0) { + // Calculate the value of Y' in the above description. We are + // splitting the original elements into Scale equal-sized pieces + // and for truncation purposes want the last (least-significant) + // of these pieces for IndexN. This is easiest to do by calculating + // the start index of the following element and then subtracting 1. + unsigned Scale = BytesPerElement / TruncBytes; + unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1; + + // Defer the creation of the bitcast from X to combineExtract, + // which might be able to optimize the extraction. + VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8), + VecVT.getStoreSize() / TruncBytes); + EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT); + return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true); + } + } + } + } + return SDValue(); +} + SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -2552,9 +4648,118 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, Inner.getOperand(0)); SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, - DAG.getConstant(NewShlAmt, ShiftVT)); + DAG.getConstant(NewShlAmt, SDLoc(Inner), + ShiftVT)); return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, - DAG.getConstant(NewSraAmt, ShiftVT)); + DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); + } + } + } + } + if (Opcode == SystemZISD::MERGE_HIGH || + Opcode == SystemZISD::MERGE_LOW) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + if (Op0.getOpcode() == ISD::BITCAST) + Op0 = Op0.getOperand(0); + if (Op0.getOpcode() == SystemZISD::BYTE_MASK && + cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) { + // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF + // for v4f32. + if (Op1 == N->getOperand(0)) + return Op1; + // (z_merge_? 0, X) -> (z_unpackl_? 0, X). + EVT VT = Op1.getValueType(); + unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); + if (ElemBytes <= 4) { + Opcode = (Opcode == SystemZISD::MERGE_HIGH ? + SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); + EVT InVT = VT.changeVectorElementTypeToInteger(); + EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), + SystemZ::VectorBytes / ElemBytes / 2); + if (VT != InVT) { + Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); + DCI.AddToWorklist(Op1.getNode()); + } + SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); + DCI.AddToWorklist(Op.getNode()); + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); + } + } + } + // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better + // for the extraction to be done on a vMiN value, so that we can use VSTE. + // If X has wider elements then convert it to: + // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). + if (Opcode == ISD::STORE) { + auto *SN = cast<StoreSDNode>(N); + EVT MemVT = SN->getMemoryVT(); + if (MemVT.isInteger()) { + SDValue Value = combineTruncateExtract(SDLoc(N), MemVT, + SN->getValue(), DCI); + if (Value.getNode()) { + DCI.AddToWorklist(Value.getNode()); + + // Rewrite the store with the new form of stored value. + return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, + SN->getBasePtr(), SN->getMemoryVT(), + SN->getMemOperand()); + } + } + } + // Try to simplify a vector extraction. + if (Opcode == ISD::EXTRACT_VECTOR_ELT) { + if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { + SDValue Op0 = N->getOperand(0); + EVT VecVT = Op0.getValueType(); + return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, + IndexN->getZExtValue(), DCI, false); + } + } + // (join_dwords X, X) == (replicate X) + if (Opcode == SystemZISD::JOIN_DWORDS && + N->getOperand(0) == N->getOperand(1)) + return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0), + N->getOperand(0)); + // (fround (extract_vector_elt X 0)) + // (fround (extract_vector_elt X 1)) -> + // (extract_vector_elt (VROUND X) 0) + // (extract_vector_elt (VROUND X) 1) + // + // This is a special case since the target doesn't really support v2f32s. + if (Opcode == ISD::FP_ROUND) { + SDValue Op0 = N->getOperand(0); + if (N->getValueType(0) == MVT::f32 && + Op0.hasOneUse() && + Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op0.getOperand(0).getValueType() == MVT::v2f64 && + Op0.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) { + SDValue Vec = Op0.getOperand(0); + for (auto *U : Vec->uses()) { + if (U != Op0.getNode() && + U->hasOneUse() && + U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + U->getOperand(0) == Vec && + U->getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) { + SDValue OtherRound = SDValue(*U->use_begin(), 0); + if (OtherRound.getOpcode() == ISD::FP_ROUND && + OtherRound.getOperand(0) == SDValue(U, 0) && + OtherRound.getValueType() == MVT::f32) { + SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), + MVT::v4f32, Vec); + DCI.AddToWorklist(VRound.getNode()); + SDValue Extract1 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, + VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); + DCI.AddToWorklist(Extract1.getNode()); + DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); + SDValue Extract0 = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, + VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + return Extract0; + } } } } @@ -2614,8 +4819,8 @@ static unsigned forceReg(MachineInstr *MI, MachineOperand &Base, MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr *MI, MachineBasicBlock *MBB) const { - const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>( - MBB->getParent()->getSubtarget().getInstrInfo()); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); unsigned DestReg = MI->getOperand(0).getReg(); unsigned TrueReg = MI->getOperand(1).getReg(); @@ -2663,8 +4868,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, MachineBasicBlock *MBB, unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const { - const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>( - MBB->getParent()->getSubtarget().getInstrInfo()); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); unsigned SrcReg = MI->getOperand(0).getReg(); MachineOperand Base = MI->getOperand(1); @@ -2733,7 +4938,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, bool Invert) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = - static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); bool IsSubWord = (BitSize < 32); @@ -2853,7 +5058,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, unsigned BitSize) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = - static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); bool IsSubWord = (BitSize < 32); @@ -2965,7 +5170,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = - static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); // Extract the operands. Base can be a register or a frame index. @@ -3082,7 +5287,7 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI, bool ClearEven, unsigned SubReg) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = - static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -3114,7 +5319,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = - static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -3284,7 +5489,7 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = - static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); + static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -3338,6 +5543,57 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, return DoneMBB; } +// Update TBEGIN instruction with final opcode and register clobbers. +MachineBasicBlock * +SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode, + bool NoFloat) const { + MachineFunction &MF = *MBB->getParent(); + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); + + // Update opcode. + MI->setDesc(TII->get(Opcode)); + + // We cannot handle a TBEGIN that clobbers the stack or frame pointer. + // Make sure to add the corresponding GRSM bits if they are missing. + uint64_t Control = MI->getOperand(2).getImm(); + static const unsigned GPRControlBit[16] = { + 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, + 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 + }; + Control |= GPRControlBit[15]; + if (TFI->hasFP(MF)) + Control |= GPRControlBit[11]; + MI->getOperand(2).setImm(Control); + + // Add GPR clobbers. + for (int I = 0; I < 16; I++) { + if ((Control & GPRControlBit[I]) == 0) { + unsigned Reg = SystemZMC::GR64Regs[I]; + MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } + + // Add FPR/VR clobbers. + if (!NoFloat && (Control & 4) != 0) { + if (Subtarget.hasVector()) { + for (int I = 0; I < 32; I++) { + unsigned Reg = SystemZMC::VR128Regs[I]; + MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } else { + for (int I = 0; I < 16; I++) { + unsigned Reg = SystemZMC::FP64Regs[I]; + MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } + } + + return MBB; +} + MachineBasicBlock *SystemZTargetLowering:: EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { switch (MI->getOpcode()) { @@ -3579,6 +5835,12 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { return emitStringWrapper(MI, MBB, SystemZ::MVST); case SystemZ::SRSTLoop: return emitStringWrapper(MI, MBB, SystemZ::SRST); + case SystemZ::TBEGIN: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); + case SystemZ::TBEGIN_nofloat: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); + case SystemZ::TBEGINC: + return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 887c236f1e78f..b001abc693d6f 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -22,7 +22,7 @@ namespace llvm { namespace SystemZISD { -enum { +enum NodeType : unsigned { FIRST_NUMBER = ISD::BUILTIN_OP_END, // Return with a flag operand. Operand 0 is the chain operand. @@ -34,6 +34,11 @@ enum { CALL, SIBCALL, + // TLS calls. Like regular calls, except operand 1 is the TLS symbol. + // (The call target is implicitly __tls_get_offset.) + TLS_GDCALL, + TLS_LDCALL, + // Wraps a TargetGlobalAddress that should be loaded using PC-relative // accesses (LARL). Operand 0 is the address. PCREL_WRAPPER, @@ -82,6 +87,9 @@ enum { // the number of the register. EXTRACT_ACCESS, + // Count number of bits set in operand 0 per byte. + POPCNT, + // Wrappers around the ISD opcodes of the same name. The output and // first input operands are GR128s. The trailing numbers are the // widths of the second operand in bits. @@ -138,6 +146,135 @@ enum { // Perform a serialization operation. (BCR 15,0 or BCR 14,0.) SERIALIZE, + // Transaction begin. The first operand is the chain, the second + // the TDB pointer, and the third the immediate control field. + // Returns chain and glue. + TBEGIN, + TBEGIN_NOFLOAT, + + // Transaction end. Just the chain operand. Returns chain and glue. + TEND, + + // Create a vector constant by filling byte N of the result with bit + // 15-N of the single operand. + BYTE_MASK, + + // Create a vector constant by replicating an element-sized RISBG-style mask. + // The first operand specifies the starting set bit and the second operand + // specifies the ending set bit. Both operands count from the MSB of the + // element. + ROTATE_MASK, + + // Replicate a GPR scalar value into all elements of a vector. + REPLICATE, + + // Create a vector from two i64 GPRs. + JOIN_DWORDS, + + // Replicate one element of a vector into all elements. The first operand + // is the vector and the second is the index of the element to replicate. + SPLAT, + + // Interleave elements from the high half of operand 0 and the high half + // of operand 1. + MERGE_HIGH, + + // Likewise for the low halves. + MERGE_LOW, + + // Concatenate the vectors in the first two operands, shift them left + // by the third operand, and take the first half of the result. + SHL_DOUBLE, + + // Take one element of the first v2i64 operand and the one element of + // the second v2i64 operand and concatenate them to form a v2i64 result. + // The third operand is a 4-bit value of the form 0A0B, where A and B + // are the element selectors for the first operand and second operands + // respectively. + PERMUTE_DWORDS, + + // Perform a general vector permute on vector operands 0 and 1. + // Each byte of operand 2 controls the corresponding byte of the result, + // in the same way as a byte-level VECTOR_SHUFFLE mask. + PERMUTE, + + // Pack vector operands 0 and 1 into a single vector with half-sized elements. + PACK, + + // Likewise, but saturate the result and set CC. PACKS_CC does signed + // saturation and PACKLS_CC does unsigned saturation. + PACKS_CC, + PACKLS_CC, + + // Unpack the first half of vector operand 0 into double-sized elements. + // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends. + UNPACK_HIGH, + UNPACKL_HIGH, + + // Likewise for the second half. + UNPACK_LOW, + UNPACKL_LOW, + + // Shift each element of vector operand 0 by the number of bits specified + // by scalar operand 1. + VSHL_BY_SCALAR, + VSRL_BY_SCALAR, + VSRA_BY_SCALAR, + + // For each element of the output type, sum across all sub-elements of + // operand 0 belonging to the corresponding element, and add in the + // rightmost sub-element of the corresponding element of operand 1. + VSUM, + + // Compare integer vector operands 0 and 1 to produce the usual 0/-1 + // vector result. VICMPE is for equality, VICMPH for "signed greater than" + // and VICMPHL for "unsigned greater than". + VICMPE, + VICMPH, + VICMPHL, + + // Likewise, but also set the condition codes on the result. + VICMPES, + VICMPHS, + VICMPHLS, + + // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1 + // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and + // greater than" and VFCMPHE for "ordered and greater than or equal to". + VFCMPE, + VFCMPH, + VFCMPHE, + + // Likewise, but also set the condition codes on the result. + VFCMPES, + VFCMPHS, + VFCMPHES, + + // Test floating-point data class for vectors. + VFTCI, + + // Extend the even f32 elements of vector operand 0 to produce a vector + // of f64 elements. + VEXTEND, + + // Round the f64 elements of vector operand 0 to f32s and store them in the + // even elements of the result. + VROUND, + + // AND the two vector operands together and set CC based on the result. + VTM, + + // String operations that set CC as a side-effect. + VFAE_CC, + VFAEZ_CC, + VFEE_CC, + VFEEZ_CC, + VFENE_CC, + VFENEZ_CC, + VISTR_CC, + VSTRC_CC, + VSTRCZ_CC, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_<op>. // @@ -198,15 +335,40 @@ class SystemZTargetMachine; class SystemZTargetLowering : public TargetLowering { public: - explicit SystemZTargetLowering(const TargetMachine &TM); + explicit SystemZTargetLowering(const TargetMachine &TM, + const SystemZSubtarget &STI); // Override TargetLowering. MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } + MVT getVectorIdxTy() const override { + // Only the lower 12 bits of an element index are used, so we don't + // want to clobber the upper 32 bits of a GPR unnecessarily. + return MVT::i32; + } + TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) + const override { + // Widen subvectors to the full width rather than promoting integer + // elements. This is better because: + // + // (a) it means that we can handle the ABI for passing and returning + // sub-128 vectors without having to handle them as legal types. + // + // (b) we don't have instructions to extend on load and truncate on store, + // so promoting the integers is less efficient. + // + // (c) there are no multiplication instructions for the widest integer + // type (v2i64). + if (VT.getVectorElementType().getSizeInBits() % 8 == 0) + return TypeWidenVector; + return TargetLoweringBase::getPreferredVectorAction(VT); + } EVT getSetCCResultType(LLVMContext &, EVT) const override; bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; + bool isLegalICmpImmediate(int64_t Imm) const override; + bool isLegalAddImmediate(int64_t Imm) const override; bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, @@ -215,8 +377,9 @@ public: bool isTruncateFree(EVT, EVT) const override; const char *getTargetNodeName(unsigned Opcode) const override; std::pair<unsigned, const TargetRegisterClass *> - getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const override; + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + const std::string &Constraint, + MVT VT) const override; TargetLowering::ConstraintType getConstraintType(const std::string &Constraint) const override; TargetLowering::ConstraintWeight @@ -226,6 +389,26 @@ public: std::string &Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; + + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + if (ConstraintCode.size() == 1) { + switch(ConstraintCode[0]) { + default: + break; + case 'Q': + return InlineAsm::Constraint_Q; + case 'R': + return InlineAsm::Constraint_R; + case 'S': + return InlineAsm::Constraint_S; + case 'T': + return InlineAsm::Constraint_T; + } + } + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const override; @@ -257,6 +440,9 @@ private: SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const; + SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node, + SelectionDAG &DAG, unsigned Opcode, + SDValue GOTOffset) const; SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const; SDValue lowerBlockAddress(BlockAddressSDNode *Node, @@ -272,6 +458,7 @@ private: SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, @@ -282,6 +469,22 @@ private: SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, + unsigned UnpackHigh) const; + SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; + + SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp, + unsigned Index, DAGCombinerInfo &DCI, + bool Force) const; + SDValue combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op, + DAGCombinerInfo &DCI) const; // If the last instruction before MBBI in MBB was some form of COMPARE, // try to replace it with a COMPARE AND BRANCH just before MBBI. @@ -319,6 +522,10 @@ private: MachineBasicBlock *emitStringWrapper(MachineInstr *MI, MachineBasicBlock *BB, unsigned Opcode) const; + MachineBasicBlock *emitTransactionBegin(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode, + bool NoFloat) const; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 4a5582fbf4e25..27fbd7df2882e 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -46,9 +46,14 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>; defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>; } -defm : CompareZeroFP<LTEBRCompare, FP32>; -defm : CompareZeroFP<LTDBRCompare, FP64>; -defm : CompareZeroFP<LTXBRCompare, FP128>; +// Note that the comparison against zero operation is not available if we +// have vector support, since load-and-test instructions will partially +// clobber the target (vector) register. +let Predicates = [FeatureNoVector] in { + defm : CompareZeroFP<LTEBRCompare, FP32>; + defm : CompareZeroFP<LTDBRCompare, FP64>; + defm : CompareZeroFP<LTXBRCompare, FP128>; +} // Moves between 64-bit integer and floating-point registers. def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>; @@ -98,6 +103,9 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>; defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>; + // For z13 we prefer LDE over LE to avoid partial register dependencies. + def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>; + // These instructions are split after register allocation, so we don't // want a custom inserter. let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { @@ -141,7 +149,7 @@ def LDXBRA : UnaryRRF4<"ldxbra", 0xB345, FP128, FP128>, Requires<[FeatureFPExtension]>; def : Pat<(f32 (fround FP128:$src)), - (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>; + (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>; def : Pat<(f64 (fround FP128:$src)), (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; @@ -345,13 +353,13 @@ def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>; def MDEBR : BinaryRRE<"mdeb", 0xB30C, null_frag, FP64, FP32>; def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))), (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - FP32:$src1, subreg_h32), FP32:$src2)>; + FP32:$src1, subreg_r32), FP32:$src2)>; // f64 multiplication of an FP32 register and an f32 memory. def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (extloadf32 bdxaddr12only:$addr))), - (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32), + (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32), bdxaddr12only:$addr)>; // f128 multiplication of two FP64 registers. diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 9f59a1c8e7e3c..71eb9986499b6 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -142,10 +142,13 @@ def getThreeOperandOpcode : InstrMapping { // Formats are specified using operand field declarations of the form: // // bits<4> Rn : register input or output for operand n +// bits<5> Vn : vector register input or output for operand n // bits<m> In : immediate value of width m for operand n // bits<4> BDn : address operand n, which has a base and a displacement // bits<m> XBDn : address operand n, which has an index, a base and a // displacement +// bits<m> VBDn : address operand n, which has a vector index, a base and a +// displacement // bits<4> Xn : index register for address operand n // bits<4> Mn : mode value for operand n // @@ -339,11 +342,13 @@ class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> bits<4> R1; bits<20> XBD2; + bits<4> M3; let Inst{47-40} = op{15-8}; let Inst{39-36} = R1; let Inst{35-16} = XBD2; - let Inst{15-8} = 0; + let Inst{15-12} = M3; + let Inst{11-8} = 0; let Inst{7-0} = op{7-0}; let HasIndex = 1; @@ -473,6 +478,393 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> let Inst{15-0} = BD2; } +class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<16> BD2; + + let Inst{31-16} = op; + let Inst{15-0} = BD2; +} + +class InstVRIa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<16> I2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = 0; + let Inst{31-16} = I2; + let Inst{15-12} = M3; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<8> I2; + bits<8> I3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = 0; + let Inst{31-24} = I2; + let Inst{23-16} = I3; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V3; + bits<16> I2; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V3{3-0}; + let Inst{31-16} = I2; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V3{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRId<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<8> I4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = 0; + let Inst{23-16} = I4; + let Inst{15-12} = M5; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRIe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<12> I3; + bits<4> M4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-20} = I3; + let Inst{19-16} = M5; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +// Depending on the instruction mnemonic, certain bits may be or-ed into +// the M4 value provided as explicit operand. These are passed as m4or. +class InstVRRa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern, + bits<4> m4or = 0> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<4> M3; + bits<4> M4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-24} = 0; + let Inst{23-20} = M5; + let Inst{19} = !if (!eq (m4or{3}, 1), 1, M4{3}); + let Inst{18} = !if (!eq (m4or{2}, 1), 1, M4{2}); + let Inst{17} = !if (!eq (m4or{1}, 1), 1, M4{1}); + let Inst{16} = !if (!eq (m4or{0}, 1), 1, M4{0}); + let Inst{15-12} = M3; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +// Depending on the instruction mnemonic, certain bits may be or-ed into +// the M5 value provided as explicit operand. These are passed as m5or. +class InstVRRb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern, + bits<4> m5or = 0> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<4> M4; + bits<4> M5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = 0; + let Inst{23} = !if (!eq (m5or{3}, 1), 1, M5{3}); + let Inst{22} = !if (!eq (m5or{2}, 1), 1, M5{2}); + let Inst{21} = !if (!eq (m5or{1}, 1), 1, M5{1}); + let Inst{20} = !if (!eq (m5or{0}, 1), 1, M5{0}); + let Inst{19-16} = 0; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRRc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<4> M4; + bits<4> M5; + bits<4> M6; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = 0; + let Inst{23-20} = M6; + let Inst{19-16} = M5; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = 0; + let Inst{7-0} = op{7-0}; +} + +// Depending on the instruction mnemonic, certain bits may be or-ed into +// the M6 value provided as explicit operand. These are passed as m6or. +class InstVRRd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern, + bits<4> m6or = 0> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<5> V4; + bits<4> M5; + bits<4> M6; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = M5; + let Inst{23} = !if (!eq (m6or{3}, 1), 1, M6{3}); + let Inst{22} = !if (!eq (m6or{2}, 1), 1, M6{2}); + let Inst{21} = !if (!eq (m6or{1}, 1), 1, M6{1}); + let Inst{20} = !if (!eq (m6or{0}, 1), 1, M6{0}); + let Inst{19-16} = 0; + let Inst{15-12} = V4{3-0}; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = V4{4}; + let Inst{7-0} = op{7-0}; +} + +class InstVRRe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<5> V2; + bits<5> V3; + bits<5> V4; + bits<4> M5; + bits<4> M6; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V2{3-0}; + let Inst{31-28} = V3{3-0}; + let Inst{27-24} = M6; + let Inst{23-20} = 0; + let Inst{19-16} = M5; + let Inst{15-12} = V4{3-0}; + let Inst{11} = V1{4}; + let Inst{10} = V2{4}; + let Inst{9} = V3{4}; + let Inst{8} = V4{4}; + let Inst{7-0} = op{7-0}; +} + +class InstVRRf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<4> R2; + bits<4> R3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = R2; + let Inst{31-28} = R3; + let Inst{27-12} = 0; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRSa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<16> BD2; + bits<5> V3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = V3{3-0}; + let Inst{31-16} = BD2; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10} = V3{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRSb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<16> BD2; + bits<4> R3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-32} = R3; + let Inst{31-16} = BD2; + let Inst{15-12} = M4; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRSc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<16> BD2; + bits<5> V3; + bits<4> M4; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = V3{3-0}; + let Inst{31-16} = BD2; + let Inst{15-12} = M4; + let Inst{11} = 0; + let Inst{10} = V3{4}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRV<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<21> VBD2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-16} = VBD2{19-0}; + let Inst{15-12} = M3; + let Inst{11} = V1{4}; + let Inst{10} = VBD2{20}; + let Inst{9-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<5> V1; + bits<20> XBD2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = V1{3-0}; + let Inst{35-16} = XBD2; + let Inst{15-12} = M3; + let Inst{11} = V1{4}; + let Inst{10-8} = 0; + let Inst{7-0} = op{7-0}; +} + //===----------------------------------------------------------------------===// // Instruction definitions with semantics //===----------------------------------------------------------------------===// @@ -492,12 +884,6 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> // form of the source register in the destination register and // branches on the result. // -// Store: -// One register or immediate input operand and one address input operand. -// The instruction stores the first operand to the address. -// -// This category is used for both pure and truncating stores. -// // LoadMultiple: // One address input operand and two explicit output operands. // The instruction loads a range of registers from the address, @@ -510,18 +896,35 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> // with the explicit operands giving the first and last register // to store. Other stored registers are added as implicit uses. // +// StoreLength: +// One value operand, one length operand and one address operand. +// The instruction stores the value operand to the address but +// doesn't write more than the number of bytes specified by the +// length operand. +// // Unary: // One register output operand and one input operand. // +// Store: +// One address operand and one other input operand. The instruction +// stores to the address. +// // Binary: // One register output operand and two input operands. // +// StoreBinary: +// One address operand and two other input operands. The instruction +// stores to the address. +// // Compare: // Two input operands and an implicit CC output operand. // // Ternary: // One register output operand and three input operands. // +// Quaternary: +// One register output operand and four input operands. +// // LoadAndOp: // One output operand and two input operands, one of which is an address. // The instruction both reads from and writes to the address. @@ -556,6 +959,12 @@ class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls, let R2 = 0; } +class InherentVRIa<string mnemonic, bits<16> opcode, bits<16> value> + : InstVRIa<opcode, (outs VR128:$V1), (ins), mnemonic#"\t$V1", []> { + let I2 = value; + let M3 = 0; +} + class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls> : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$I2), mnemonic##"\t$R1, $I2", []> { @@ -571,6 +980,13 @@ class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> let mayLoad = 1; } +class LoadMultipleVRSa<string mnemonic, bits<16> opcode> + : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), (ins bdaddr12only:$BD2), + mnemonic#"\t$V1, $V3, $BD2", []> { + let M4 = 0; + let mayLoad = 1; +} + class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls> : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2), @@ -619,12 +1035,39 @@ multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, } } +class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<5> bytes, bits<4> type = 0> + : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2), + mnemonic#"\t$V1, $XBD2", + [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2)))]> { + let M3 = type; + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreLengthVRSb<string mnemonic, bits<16> opcode, + SDPatternOperator operator, bits<5> bytes> + : InstVRSb<opcode, (outs), (ins VR128:$V1, GR32:$R3, bdaddr12only:$BD2), + mnemonic#"\t$V1, $R3, $BD2", + [(operator VR128:$V1, GR32:$R3, bdaddr12only:$BD2)]> { + let M4 = 0; + let mayStore = 1; + let AccessBytes = bytes; +} + class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, bdaddr20only:$BD2), mnemonic#"\t$R1, $R3, $BD2", []> { let mayStore = 1; } +class StoreMultipleVRSa<string mnemonic, bits<16> opcode> + : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, bdaddr12only:$BD2), + mnemonic#"\t$V1, $V3, $BD2", []> { + let M4 = 0; + let mayStore = 1; +} + // StoreSI* instructions are used to store an integer to memory, but the // addresses are more restricted than for normal stores. If we are in the // situation of having to force either the address into a register or the @@ -857,6 +1300,7 @@ class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, let OpType = "mem"; let mayLoad = 1; let AccessBytes = bytes; + let M3 = 0; } class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, @@ -883,6 +1327,46 @@ multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, } } +class UnaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, Immediate imm, bits<4> type = 0> + : InstVRIa<opcode, (outs tr.op:$V1), (ins imm:$I2), + mnemonic#"\t$V1, $I2", + [(set tr.op:$V1, (tr.vt (operator imm:$I2)))]> { + let M3 = type; +} + +class UnaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0, + bits<4> m5 = 0> + : InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2), + mnemonic#"\t$V1, $V2", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]> { + let M3 = type; + let M4 = m4; + let M5 = m5; +} + +multiclass UnaryVRRaSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, TypedReg tr1, + TypedReg tr2, bits<4> type, bits<4> modifier = 0, + bits<4> modifier_cc = 1> { + def "" : UnaryVRRa<mnemonic, opcode, operator, tr1, tr2, type, 0, modifier>; + let Defs = [CC] in + def S : UnaryVRRa<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, 0, + modifier_cc>; +} + +class UnaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<5> bytes, bits<4> type = 0> + : InstVRX<opcode, (outs tr.op:$V1), (ins bdxaddr12only:$XBD2), + mnemonic#"\t$V1, $XBD2", + [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2)))]> { + let M3 = type; + let mayLoad = 1; + let AccessBytes = bytes; +} + class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRR<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), @@ -1036,6 +1520,7 @@ class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, let DisableEncoding = "$R1src"; let mayLoad = 1; let AccessBytes = bytes; + let M3 = 0; } class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, @@ -1094,6 +1579,148 @@ multiclass BinarySIPair<string mnemonic, bits<8> siOpcode, } } +class BinaryVRIb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<4> type> + : InstVRIb<opcode, (outs tr.op:$V1), (ins imm32zx8:$I2, imm32zx8:$I3), + mnemonic#"\t$V1, $I2, $I3", + [(set tr.op:$V1, (tr.vt (operator imm32zx8:$I2, imm32zx8:$I3)))]> { + let M4 = type; +} + +class BinaryVRIc<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> + : InstVRIc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, imm32zx16:$I2), + mnemonic#"\t$V1, $V3, $I2", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V3), + imm32zx16:$I2)))]> { + let M4 = type; +} + +class BinaryVRIe<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m5> + : InstVRIe<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx12:$I3), + mnemonic#"\t$V1, $V2, $I3", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + imm32zx12:$I3)))]> { + let M4 = type; + let M5 = m5; +} + +class BinaryVRRa<string mnemonic, bits<16> opcode> + : InstVRRa<opcode, (outs VR128:$V1), (ins VR128:$V2, imm32zx4:$M3), + mnemonic#"\t$V1, $V2, $M3", []> { + let M4 = 0; + let M5 = 0; +} + +class BinaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type = 0, + bits<4> modifier = 0> + : InstVRRb<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3), + mnemonic#"\t$V1, $V2, $V3", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3))))]> { + let M4 = type; + let M5 = modifier; +} + +// Declare a pair of instructions, one which sets CC and one which doesn't. +// The CC-setting form ends with "S" and sets the low bit of M5. +multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, TypedReg tr1, + TypedReg tr2, bits<4> type, + bits<4> modifier = 0, bits<4> modifier_cc = 1> { + def "" : BinaryVRRb<mnemonic, opcode, operator, tr1, tr2, type, modifier>; + let Defs = [CC] in + def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, + modifier_cc>; +} + +class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m5 = 0, + bits<4> m6 = 0> + : InstVRRc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3), + mnemonic#"\t$V1, $V2, $V3", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3))))]> { + let M4 = type; + let M5 = m5; + let M6 = m6; +} + +multiclass BinaryVRRcSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, TypedReg tr1, + TypedReg tr2, bits<4> type, bits<4> m5, + bits<4> modifier = 0, bits<4> modifier_cc = 1> { + def "" : BinaryVRRc<mnemonic, opcode, operator, tr1, tr2, type, m5, modifier>; + let Defs = [CC] in + def S : BinaryVRRc<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, + m5, modifier_cc>; +} + +class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr> + : InstVRRf<opcode, (outs tr.op:$V1), (ins GR64:$R2, GR64:$R3), + mnemonic#"\t$V1, $R2, $R3", + [(set tr.op:$V1, (tr.vt (operator GR64:$R2, GR64:$R3)))]>; + +class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> + : InstVRSa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, shift12only:$BD2), + mnemonic#"\t$V1, $V3, $BD2", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V3), + shift12only:$BD2)))]> { + let M4 = type; +} + +class BinaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + bits<5> bytes> + : InstVRSb<opcode, (outs VR128:$V1), (ins GR32:$R3, bdaddr12only:$BD2), + mnemonic#"\t$V1, $R3, $BD2", + [(set VR128:$V1, (operator GR32:$R3, bdaddr12only:$BD2))]> { + let M4 = 0; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class BinaryVRSc<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<4> type> + : InstVRSc<opcode, (outs GR64:$R1), (ins tr.op:$V3, shift12only:$BD2), + mnemonic#"\t$R1, $V3, $BD2", + [(set GR64:$R1, (operator (tr.vt tr.op:$V3), shift12only:$BD2))]> { + let M4 = type; +} + +class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<5> bytes> + : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3), + mnemonic#"\t$V1, $XBD2, $M3", + [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2, + imm32zx4:$M3)))]> { + let mayLoad = 1; + let AccessBytes = bytes; +} + +class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes, + Immediate index> + : InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3), + mnemonic#"\t$V1, $VBD2, $M3", []> { + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreBinaryVRX<string mnemonic, bits<16> opcode, + SDPatternOperator operator, TypedReg tr, bits<5> bytes, + Immediate index> + : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2, index:$M3), + mnemonic#"\t$V1, $XBD2, $M3", + [(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2, index:$M3)]> { + let mayStore = 1; + let AccessBytes = bytes; +} + class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2), @@ -1166,6 +1793,7 @@ class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, let isCompare = 1; let mayLoad = 1; let AccessBytes = bytes; + let M3 = 0; } class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, @@ -1235,6 +1863,17 @@ multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, } } +class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr, bits<4> type> + : InstVRRa<opcode, (outs), (ins tr.op:$V1, tr.op:$V2), + mnemonic#"\t$V1, $V2", + [(operator (tr.vt tr.op:$V1), (tr.vt tr.op:$V2))]> { + let isCompare = 1; + let M3 = type; + let M4 = 0; + let M5 = 0; +} + class TernaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls> : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2), @@ -1261,6 +1900,188 @@ class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator, let AccessBytes = bytes; } +class TernaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index> + : InstVRIa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V1src, imm:$I2, index:$M3), + mnemonic#"\t$V1, $I2, $M3", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src), + imm:$I2, index:$M3)))]> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; +} + +class TernaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> + : InstVRId<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, imm32zx8:$I4), + mnemonic#"\t$V1, $V2, $V3, $I4", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + imm32zx8:$I4)))]> { + let M5 = type; +} + +class TernaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m4or> + : InstVRRa<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5), + mnemonic#"\t$V1, $V2, $M4, $M5", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + imm32zx4:$M4, + imm32zx4:$M5)))], + m4or> { + let M3 = type; +} + +class TernaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type, + SDPatternOperator m5mask, bits<4> m5or> + : InstVRRb<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, m5mask:$M5), + mnemonic#"\t$V1, $V2, $V3, $M5", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + m5mask:$M5)))], + m5or> { + let M4 = type; +} + +multiclass TernaryVRRbSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, TypedReg tr1, + TypedReg tr2, bits<4> type, bits<4> m5or> { + def "" : TernaryVRRb<mnemonic, opcode, operator, tr1, tr2, type, + imm32zx4even, !and (m5or, 14)>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3", + (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, 0)>; + let Defs = [CC] in + def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, + imm32zx4even, !add(!and (m5or, 14), 1)>; + def : InstAlias<mnemonic#"s\t$V1, $V2, $V3", + (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, 0)>; +} + +class TernaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2> + : InstVRRc<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, imm32zx4:$M4), + mnemonic#"\t$V1, $V2, $V3, $M4", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + imm32zx4:$M4)))]> { + let M5 = 0; + let M6 = 0; +} + +class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type = 0> + : InstVRRd<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4), + mnemonic#"\t$V1, $V2, $V3, $V4", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + (tr1.vt tr1.op:$V4))))]> { + let M5 = type; + let M6 = 0; +} + +class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0> + : InstVRRe<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4), + mnemonic#"\t$V1, $V2, $V3, $V4", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + (tr1.vt tr1.op:$V4))))]> { + let M5 = m5; + let M6 = type; +} + +class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, RegisterOperand cls, bits<4> type> + : InstVRSb<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V1src, cls:$R3, shift12only:$BD2), + mnemonic#"\t$V1, $R3, $BD2", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src), + cls:$R3, + shift12only:$BD2)))]> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; + let M4 = type; +} + +class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes, + Immediate index> + : InstVRV<opcode, (outs VR128:$V1), + (ins VR128:$V1src, bdvaddr12only:$VBD2, index:$M3), + mnemonic#"\t$V1, $VBD2, $M3", []> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class TernaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index> + : InstVRX<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V1src, bdxaddr12only:$XBD2, index:$M3), + mnemonic#"\t$V1, $XBD2, $M3", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src), + bdxaddr12only:$XBD2, + index:$M3)))]> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; + let mayLoad = 1; + let AccessBytes = bytes; +} + +class QuaternaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> + : InstVRId<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V1src, tr2.op:$V2, tr2.op:$V3, imm32zx8:$I4), + mnemonic#"\t$V1, $V2, $V3, $I4", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src), + (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + imm32zx8:$I4)))]> { + let Constraints = "$V1 = $V1src"; + let DisableEncoding = "$V1src"; + let M5 = type; +} + +class QuaternaryVRRd<string mnemonic, bits<16> opcode, + SDPatternOperator operator, TypedReg tr1, TypedReg tr2, + bits<4> type, SDPatternOperator m6mask, bits<4> m6or> + : InstVRRd<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, m6mask:$M6), + mnemonic#"\t$V1, $V2, $V3, $V4, $M6", + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2), + (tr2.vt tr2.op:$V3), + (tr2.vt tr2.op:$V4), + m6mask:$M6)))], + m6or> { + let M5 = type; +} + +multiclass QuaternaryVRRdSPair<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + SDPatternOperator operator_cc, TypedReg tr1, + TypedReg tr2, bits<4> type, bits<4> m6or> { + def "" : QuaternaryVRRd<mnemonic, opcode, operator, tr1, tr2, type, + imm32zx4even, !and (m6or, 14)>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4", + (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, tr2.op:$V4, 0)>; + let Defs = [CC] in + def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, + imm32zx4even, !add (!and (m6or, 14), 1)>; + def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4", + (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, tr2.op:$V4, 0)>; +} + class LoadAndOpRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdaddr20only> : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, mode:$BD2), @@ -1330,10 +2151,13 @@ class PrefetchRILPC<string mnemonic, bits<12> opcode, // A floating-point load-and test operation. Create both a normal unary // operation and one that acts as a comparison against zero. +// Note that the comparison against zero operation is not available if we +// have vector support, since load-and-test instructions will partially +// clobber the target (vector) register. multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode, RegisterOperand cls> { def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>; - let isCodeGenOnly = 1 in + let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>; } @@ -1577,6 +2401,26 @@ class Alias<int size, dag outs, dag ins, list<dag> pattern> let isCodeGenOnly = 1; } +class UnaryAliasVRS<RegisterOperand cls1, RegisterOperand cls2> + : Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>; + +// An alias of a UnaryVRR*, but with different register sizes. +class UnaryAliasVRR<SDPatternOperator operator, TypedReg tr1, TypedReg tr2> + : Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2), + [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]>; + +// An alias of a UnaryVRX, but with different register sizes. +class UnaryAliasVRX<SDPatternOperator operator, TypedReg tr, + AddressingMode mode = bdxaddr12only> + : Alias<6, (outs tr.op:$V1), (ins mode:$XBD2), + [(set tr.op:$V1, (tr.vt (operator mode:$XBD2)))]>; + +// An alias of a StoreVRX, but with different register sizes. +class StoreAliasVRX<SDPatternOperator operator, TypedReg tr, + AddressingMode mode = bdxaddr12only> + : Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2), + [(operator (tr.vt tr.op:$V1), mode:$XBD2)]>; + // An alias of a BinaryRI, but with different register sizes. class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls, Immediate imm> @@ -1593,6 +2437,10 @@ class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls, let Constraints = "$R1 = $R1src"; } +// An alias of a BinaryVRRf, but with different register sizes. +class BinaryAliasVRRf<RegisterOperand cls> + : Alias<6, (outs VR128:$V1), (ins cls:$R2, cls:$R3), []>; + // An alias of a CompareRI, but with different register sizes. class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls, Immediate imm> diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 8ff9553ca0817..90598852b5ed0 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -423,7 +423,7 @@ static MachineInstr *getDef(unsigned Reg, } // Return true if MI is a shift of type Opcode by Imm bits. -static bool isShift(MachineInstr *MI, int Opcode, int64_t Imm) { +static bool isShift(MachineInstr *MI, unsigned Opcode, int64_t Imm) { return (MI->getOpcode() == Opcode && !MI->getOperand(2).getReg() && MI->getOperand(3).getImm() == Imm); @@ -578,6 +578,12 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opcode = SystemZ::LDR; else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg)) Opcode = SystemZ::LXR; + else if (SystemZ::VR32BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::VLR32; + else if (SystemZ::VR64BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::VLR64; + else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::VLR; else llvm_unreachable("Impossible reg-to-reg copy"); @@ -633,7 +639,7 @@ struct LogicOp { LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize) : RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {} - LLVM_EXPLICIT operator bool() const { return RegSize; } + explicit operator bool() const { return RegSize; } unsigned RegSize, ImmLSB, ImmSize; }; @@ -723,9 +729,12 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Start, End; if (isRxSBGMask(Imm, And.RegSize, Start, End)) { unsigned NewOpcode; - if (And.RegSize == 64) + if (And.RegSize == 64) { NewOpcode = SystemZ::RISBG; - else { + // Prefer RISBGN if available, since it does not clobber CC. + if (STI.hasMiscellaneousExtensions()) + NewOpcode = SystemZ::RISBGN; + } else { NewOpcode = SystemZ::RISBMux; Start &= 31; End &= 31; @@ -743,11 +752,10 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return nullptr; } -MachineInstr * -SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const { +MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + ArrayRef<unsigned> Ops, + int FrameIndex) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Size = MFI->getObjectSize(FrameIndex); unsigned Opcode = MI->getOpcode(); @@ -862,9 +870,9 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } MachineInstr * -SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { +SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, + MachineInstr *LoadMI) const { return nullptr; } @@ -1114,6 +1122,16 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC, } else if (RC == &SystemZ::FP128BitRegClass) { LoadOpcode = SystemZ::LX; StoreOpcode = SystemZ::STX; + } else if (RC == &SystemZ::VR32BitRegClass) { + LoadOpcode = SystemZ::VL32; + StoreOpcode = SystemZ::VST32; + } else if (RC == &SystemZ::VR64BitRegClass) { + LoadOpcode = SystemZ::VL64; + StoreOpcode = SystemZ::VST64; + } else if (RC == &SystemZ::VF128BitRegClass || + RC == &SystemZ::VR128BitRegClass) { + LoadOpcode = SystemZ::VL; + StoreOpcode = SystemZ::VST; } else llvm_unreachable("Unsupported regclass to load or store"); } @@ -1147,17 +1165,22 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode, unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const { switch (Opcode) { - case SystemZ::L: return SystemZ::LT; - case SystemZ::LY: return SystemZ::LT; - case SystemZ::LG: return SystemZ::LTG; - case SystemZ::LGF: return SystemZ::LTGF; - case SystemZ::LR: return SystemZ::LTR; - case SystemZ::LGFR: return SystemZ::LTGFR; - case SystemZ::LGR: return SystemZ::LTGR; - case SystemZ::LER: return SystemZ::LTEBR; - case SystemZ::LDR: return SystemZ::LTDBR; - case SystemZ::LXR: return SystemZ::LTXBR; - default: return 0; + case SystemZ::L: return SystemZ::LT; + case SystemZ::LY: return SystemZ::LT; + case SystemZ::LG: return SystemZ::LTG; + case SystemZ::LGF: return SystemZ::LTGF; + case SystemZ::LR: return SystemZ::LTR; + case SystemZ::LGFR: return SystemZ::LTGFR; + case SystemZ::LGR: return SystemZ::LTGR; + case SystemZ::LER: return SystemZ::LTEBR; + case SystemZ::LDR: return SystemZ::LTDBR; + case SystemZ::LXR: return SystemZ::LTXBR; + // On zEC12 we prefer to use RISBGN. But if there is a chance to + // actually use the condition code, we may turn it back into RISGB. + // Note that RISBG is not really a "load-and-test" instruction, + // but sets the same condition code values, so is OK to use here. + case SystemZ::RISBGN: return SystemZ::RISBG; + default: return 0; } } @@ -1178,6 +1201,7 @@ static bool isStringOfOnes(uint64_t Mask, unsigned &LSB, unsigned &Length) { bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize, unsigned &Start, unsigned &End) const { // Reject trivial all-zero masks. + Mask &= allOnes(BitSize); if (Mask == 0) return false; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index d2e3f541f80ee..b55810b253f15 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -56,10 +56,13 @@ static inline unsigned getCompareZeroCCMask(unsigned int Flags) { // SystemZ MachineOperand target flags. enum { // Masks out the bits for the access model. - MO_SYMBOL_MODIFIER = (1 << 0), + MO_SYMBOL_MODIFIER = (3 << 0), // @GOT (aka @GOTENT) - MO_GOT = (1 << 0) + MO_GOT = (1 << 0), + + // @INDNTPOFF + MO_INDNTPOFF = (2 << 0) }; // Classifies a branch. enum BranchType { @@ -183,11 +186,11 @@ public: MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const override; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, + ArrayRef<unsigned> Ops, int FrameIndex) const override; - MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const override; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, + MachineInstr *LoadMI) const override; bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override; bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 902d74de506bf..820f30bc173d2 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -249,11 +249,21 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>; } +// TLS calls. These will be lowered into a call to __tls_get_offset, +// with an extra relocation specifying the TLS symbol. +let isCall = 1, Defs = [R14D, CC] in { + def TLS_GDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops), + [(z_tls_gdcall tglobaltlsaddr:$I2)]>; + def TLS_LDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops), + [(z_tls_ldcall tglobaltlsaddr:$I2)]>; +} + // Define the general form of the call instructions for the asm parser. // These instructions don't hard-code %r14 as the return address register. -def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2), +// Allow an optional TLS marker symbol to generate TLS call relocations. +def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16tls:$I2), "bras\t$R1, $I2", []>; -def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2), +def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32tls:$I2), "brasl\t$R1, $I2", []>; def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2), "basr\t$R1, $R2", []>; @@ -587,6 +597,12 @@ let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1, [(set GR64:$R1, pcrel32:$I2)]>; } +// Load the Global Offset Table address. This will be lowered into a +// larl $R1, _GLOBAL_OFFSET_TABLE_ +// instruction. +def GOT : Alias<6, (outs GR64:$R1), (ins), + [(set GR64:$R1, (global_offset_table))]>; + //===----------------------------------------------------------------------===// // Absolute and Negation //===----------------------------------------------------------------------===// @@ -1045,6 +1061,10 @@ let Defs = [CC] in { def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; } +// On zEC12 we have a variant of RISBG that does not set CC. +let Predicates = [FeatureMiscellaneousExtensions] in + def RISBGN : RotateSelectRIEf<"risbgn", 0xEC59, GR64, GR64>; + // Forms of RISBG that only affect one word of the destination register. // They do not set CC. let Predicates = [FeatureHighWord] in { @@ -1342,6 +1362,60 @@ let Defs = [CC] in { } //===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureTransactionalExecution] in { + // Transaction Begin + let hasSideEffects = 1, mayStore = 1, + usesCustomInserter = 1, Defs = [CC] in { + def TBEGIN : InstSIL<0xE560, + (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), + "tbegin\t$BD1, $I2", + [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>; + def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), + [(z_tbegin_nofloat bdaddr12only:$BD1, + imm32zx16:$I2)]>; + def TBEGINC : InstSIL<0xE561, + (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), + "tbeginc\t$BD1, $I2", + [(int_s390_tbeginc bdaddr12only:$BD1, + imm32zx16:$I2)]>; + } + + // Transaction End + let hasSideEffects = 1, Defs = [CC], BD2 = 0 in + def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>; + + // Transaction Abort + let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in + def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2), + "tabort\t$BD2", + [(int_s390_tabort bdaddr12only:$BD2)]>; + + // Nontransactional Store + let hasSideEffects = 1 in + def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>; + + // Extract Transaction Nesting Depth + let hasSideEffects = 1 in + def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>; +} + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureProcessorAssist] in { + let hasSideEffects = 1, R4 = 0 in + def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3), + "ppa\t$R1, $R2, $R3", []>; + def : Pat<(int_s390_ppa_txassist GR32:$src), + (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32), + 0, 1)>; +} + +//===----------------------------------------------------------------------===// // Miscellaneous Instructions. //===----------------------------------------------------------------------===// @@ -1366,6 +1440,13 @@ let Defs = [CC] in { def : Pat<(ctlz GR64:$src), (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>; +// Population count. Counts bits set per byte. +let Predicates = [FeaturePopulationCount], Defs = [CC] in { + def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2), + "popcnt\t$R1, $R2", + [(set GR64:$R1, (z_popcnt GR64:$R2))]>; +} + // Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. def : Pat<(i64 (anyext GR32:$src)), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>; diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td new file mode 100644 index 0000000000000..c101e43ada3a4 --- /dev/null +++ b/lib/Target/SystemZ/SystemZInstrVector.td @@ -0,0 +1,1097 @@ +//==- SystemZInstrVector.td - SystemZ Vector instructions ------*- tblgen-*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Register move. + def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>; + def VLR32 : UnaryAliasVRR<null_frag, v32eb, v32eb>; + def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>; + + // Load GR from VR element. + def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>; + def VLGVH : BinaryVRSc<"vlgvh", 0xE721, null_frag, v128h, 1>; + def VLGVF : BinaryVRSc<"vlgvf", 0xE721, null_frag, v128f, 2>; + def VLGVG : BinaryVRSc<"vlgvg", 0xE721, z_vector_extract, v128g, 3>; + + // Load VR element from GR. + def VLVGB : TernaryVRSb<"vlvgb", 0xE722, z_vector_insert, + v128b, v128b, GR32, 0>; + def VLVGH : TernaryVRSb<"vlvgh", 0xE722, z_vector_insert, + v128h, v128h, GR32, 1>; + def VLVGF : TernaryVRSb<"vlvgf", 0xE722, z_vector_insert, + v128f, v128f, GR32, 2>; + def VLVGG : TernaryVRSb<"vlvgg", 0xE722, z_vector_insert, + v128g, v128g, GR64, 3>; + + // Load VR from GRs disjoint. + def VLVGP : BinaryVRRf<"vlvgp", 0xE762, z_join_dwords, v128g>; + def VLVGP32 : BinaryAliasVRRf<GR32>; +} + +// Extractions always assign to the full GR64, even if the element would +// fit in the lower 32 bits. Sub-i64 extracts therefore need to take a +// subreg of the result. +class VectorExtractSubreg<ValueType type, Instruction insn> + : Pat<(i32 (z_vector_extract (type VR128:$vec), shift12only:$index)), + (EXTRACT_SUBREG (insn VR128:$vec, shift12only:$index), subreg_l32)>; + +def : VectorExtractSubreg<v16i8, VLGVB>; +def : VectorExtractSubreg<v8i16, VLGVH>; +def : VectorExtractSubreg<v4i32, VLGVF>; + +//===----------------------------------------------------------------------===// +// Immediate instructions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Generate byte mask. + def VZERO : InherentVRIa<"vzero", 0xE744, 0>; + def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; + def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>; + + // Generate mask. + def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>; + def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>; + def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>; + def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>; + + // Load element immediate. + // + // We want these instructions to be used ahead of VLVG* where possible. + // However, VLVG* takes a variable BD-format index whereas VLEI takes + // a plain immediate index. This means that VLVG* has an extra "base" + // register operand and is 3 units more complex. Bumping the complexity + // of the VLEI* instructions by 4 means that they are strictly better + // than VLVG* in cases where both forms match. + let AddedComplexity = 4 in { + def VLEIB : TernaryVRIa<"vleib", 0xE740, z_vector_insert, + v128b, v128b, imm32sx16trunc, imm32zx4>; + def VLEIH : TernaryVRIa<"vleih", 0xE741, z_vector_insert, + v128h, v128h, imm32sx16trunc, imm32zx3>; + def VLEIF : TernaryVRIa<"vleif", 0xE743, z_vector_insert, + v128f, v128f, imm32sx16, imm32zx2>; + def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert, + v128g, v128g, imm64sx16, imm32zx1>; + } + + // Replicate immediate. + def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>; + def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>; + def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>; + def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>; +} + +//===----------------------------------------------------------------------===// +// Loads +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Load. + def VL : UnaryVRX<"vl", 0xE706, null_frag, v128any, 16>; + + // Load to block boundary. The number of loaded bytes is only known + // at run time. The instruction is really polymorphic, but v128b matches + // the return type of the associated intrinsic. + def VLBB : BinaryVRX<"vlbb", 0xE707, int_s390_vlbb, v128b, 0>; + + // Load count to block boundary. + let Defs = [CC] in + def LCBB : InstRXE<0xE727, (outs GR32:$R1), + (ins bdxaddr12only:$XBD2, imm32zx4:$M3), + "lcbb\t$R1, $XBD2, $M3", + [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2, + imm32zx4:$M3))]>; + + // Load with length. The number of loaded bytes is only known at run time. + def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>; + + // Load multiple. + def VLM : LoadMultipleVRSa<"vlm", 0xE736>; + + // Load and replicate + def VLREPB : UnaryVRX<"vlrepb", 0xE705, z_replicate_loadi8, v128b, 1, 0>; + def VLREPH : UnaryVRX<"vlreph", 0xE705, z_replicate_loadi16, v128h, 2, 1>; + def VLREPF : UnaryVRX<"vlrepf", 0xE705, z_replicate_loadi32, v128f, 4, 2>; + def VLREPG : UnaryVRX<"vlrepg", 0xE705, z_replicate_loadi64, v128g, 8, 3>; + def : Pat<(v4f32 (z_replicate_loadf32 bdxaddr12only:$addr)), + (VLREPF bdxaddr12only:$addr)>; + def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)), + (VLREPG bdxaddr12only:$addr)>; + + // Use VLREP to load subvectors. These patterns use "12pair" because + // LEY and LDY offer full 20-bit displacement fields. It's often better + // to use those instructions rather than force a 20-bit displacement + // into a GPR temporary. + def VL32 : UnaryAliasVRX<load, v32eb, bdxaddr12pair>; + def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>; + + // Load logical element and zero. + def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>; + def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>; + def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>; + def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>; + def : Pat<(v4f32 (z_vllezf32 bdxaddr12only:$addr)), + (VLLEZF bdxaddr12only:$addr)>; + def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)), + (VLLEZG bdxaddr12only:$addr)>; + + // Load element. + def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8, v128b, v128b, 1, imm32zx4>; + def VLEH : TernaryVRX<"vleh", 0xE701, z_vlei16, v128h, v128h, 2, imm32zx3>; + def VLEF : TernaryVRX<"vlef", 0xE703, z_vlei32, v128f, v128f, 4, imm32zx2>; + def VLEG : TernaryVRX<"vleg", 0xE702, z_vlei64, v128g, v128g, 8, imm32zx1>; + def : Pat<(z_vlef32 (v4f32 VR128:$val), bdxaddr12only:$addr, imm32zx2:$index), + (VLEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>; + def : Pat<(z_vlef64 (v2f64 VR128:$val), bdxaddr12only:$addr, imm32zx1:$index), + (VLEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>; + + // Gather element. + def VGEF : TernaryVRV<"vgef", 0xE713, 4, imm32zx2>; + def VGEG : TernaryVRV<"vgeg", 0xE712, 8, imm32zx1>; +} + +// Use replicating loads if we're inserting a single element into an +// undefined vector. This avoids a false dependency on the previous +// register contents. +multiclass ReplicatePeephole<Instruction vlrep, ValueType vectype, + SDPatternOperator load, ValueType scalartype> { + def : Pat<(vectype (z_vector_insert + (undef), (scalartype (load bdxaddr12only:$addr)), 0)), + (vlrep bdxaddr12only:$addr)>; + def : Pat<(vectype (scalar_to_vector + (scalartype (load bdxaddr12only:$addr)))), + (vlrep bdxaddr12only:$addr)>; +} +defm : ReplicatePeephole<VLREPB, v16i8, anyextloadi8, i32>; +defm : ReplicatePeephole<VLREPH, v8i16, anyextloadi16, i32>; +defm : ReplicatePeephole<VLREPF, v4i32, load, i32>; +defm : ReplicatePeephole<VLREPG, v2i64, load, i64>; +defm : ReplicatePeephole<VLREPF, v4f32, load, f32>; +defm : ReplicatePeephole<VLREPG, v2f64, load, f64>; + +//===----------------------------------------------------------------------===// +// Stores +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Store. + def VST : StoreVRX<"vst", 0xE70E, null_frag, v128any, 16>; + + // Store with length. The number of stored bytes is only known at run time. + def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>; + + // Store multiple. + def VSTM : StoreMultipleVRSa<"vstm", 0xE73E>; + + // Store element. + def VSTEB : StoreBinaryVRX<"vsteb", 0xE708, z_vstei8, v128b, 1, imm32zx4>; + def VSTEH : StoreBinaryVRX<"vsteh", 0xE709, z_vstei16, v128h, 2, imm32zx3>; + def VSTEF : StoreBinaryVRX<"vstef", 0xE70B, z_vstei32, v128f, 4, imm32zx2>; + def VSTEG : StoreBinaryVRX<"vsteg", 0xE70A, z_vstei64, v128g, 8, imm32zx1>; + def : Pat<(z_vstef32 (v4f32 VR128:$val), bdxaddr12only:$addr, + imm32zx2:$index), + (VSTEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>; + def : Pat<(z_vstef64 (v2f64 VR128:$val), bdxaddr12only:$addr, + imm32zx1:$index), + (VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>; + + // Use VSTE to store subvectors. These patterns use "12pair" because + // STEY and STDY offer full 20-bit displacement fields. It's often better + // to use those instructions rather than force a 20-bit displacement + // into a GPR temporary. + def VST32 : StoreAliasVRX<store, v32eb, bdxaddr12pair>; + def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>; + + // Scatter element. + def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>; + def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>; +} + +//===----------------------------------------------------------------------===// +// Selects and permutes +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Merge high. + def VMRHB : BinaryVRRc<"vmrhb", 0xE761, z_merge_high, v128b, v128b, 0>; + def VMRHH : BinaryVRRc<"vmrhh", 0xE761, z_merge_high, v128h, v128h, 1>; + def VMRHF : BinaryVRRc<"vmrhf", 0xE761, z_merge_high, v128f, v128f, 2>; + def VMRHG : BinaryVRRc<"vmrhg", 0xE761, z_merge_high, v128g, v128g, 3>; + def : BinaryRRWithType<VMRHF, VR128, z_merge_high, v4f32>; + def : BinaryRRWithType<VMRHG, VR128, z_merge_high, v2f64>; + + // Merge low. + def VMRLB : BinaryVRRc<"vmrlb", 0xE760, z_merge_low, v128b, v128b, 0>; + def VMRLH : BinaryVRRc<"vmrlh", 0xE760, z_merge_low, v128h, v128h, 1>; + def VMRLF : BinaryVRRc<"vmrlf", 0xE760, z_merge_low, v128f, v128f, 2>; + def VMRLG : BinaryVRRc<"vmrlg", 0xE760, z_merge_low, v128g, v128g, 3>; + def : BinaryRRWithType<VMRLF, VR128, z_merge_low, v4f32>; + def : BinaryRRWithType<VMRLG, VR128, z_merge_low, v2f64>; + + // Permute. + def VPERM : TernaryVRRe<"vperm", 0xE78C, z_permute, v128b, v128b>; + + // Permute doubleword immediate. + def VPDI : TernaryVRRc<"vpdi", 0xE784, z_permute_dwords, v128g, v128g>; + + // Replicate. + def VREPB : BinaryVRIc<"vrepb", 0xE74D, z_splat, v128b, v128b, 0>; + def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>; + def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>; + def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>; + def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)), + (VREPF VR128:$vec, imm32zx16:$index)>; + def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)), + (VREPG VR128:$vec, imm32zx16:$index)>; + + // Select. + def VSEL : TernaryVRRe<"vsel", 0xE78D, null_frag, v128any, v128any>; +} + +//===----------------------------------------------------------------------===// +// Widening and narrowing +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Pack + def VPKH : BinaryVRRc<"vpkh", 0xE794, z_pack, v128b, v128h, 1>; + def VPKF : BinaryVRRc<"vpkf", 0xE794, z_pack, v128h, v128f, 2>; + def VPKG : BinaryVRRc<"vpkg", 0xE794, z_pack, v128f, v128g, 3>; + + // Pack saturate. + defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, int_s390_vpksh, z_packs_cc, + v128b, v128h, 1>; + defm VPKSF : BinaryVRRbSPair<"vpksf", 0xE797, int_s390_vpksf, z_packs_cc, + v128h, v128f, 2>; + defm VPKSG : BinaryVRRbSPair<"vpksg", 0xE797, int_s390_vpksg, z_packs_cc, + v128f, v128g, 3>; + + // Pack saturate logical. + defm VPKLSH : BinaryVRRbSPair<"vpklsh", 0xE795, int_s390_vpklsh, z_packls_cc, + v128b, v128h, 1>; + defm VPKLSF : BinaryVRRbSPair<"vpklsf", 0xE795, int_s390_vpklsf, z_packls_cc, + v128h, v128f, 2>; + defm VPKLSG : BinaryVRRbSPair<"vpklsg", 0xE795, int_s390_vpklsg, z_packls_cc, + v128f, v128g, 3>; + + // Sign-extend to doubleword. + def VSEGB : UnaryVRRa<"vsegb", 0xE75F, z_vsei8, v128g, v128g, 0>; + def VSEGH : UnaryVRRa<"vsegh", 0xE75F, z_vsei16, v128g, v128g, 1>; + def VSEGF : UnaryVRRa<"vsegf", 0xE75F, z_vsei32, v128g, v128g, 2>; + def : Pat<(z_vsei8_by_parts (v16i8 VR128:$src)), (VSEGB VR128:$src)>; + def : Pat<(z_vsei16_by_parts (v8i16 VR128:$src)), (VSEGH VR128:$src)>; + def : Pat<(z_vsei32_by_parts (v4i32 VR128:$src)), (VSEGF VR128:$src)>; + + // Unpack high. + def VUPHB : UnaryVRRa<"vuphb", 0xE7D7, z_unpack_high, v128h, v128b, 0>; + def VUPHH : UnaryVRRa<"vuphh", 0xE7D7, z_unpack_high, v128f, v128h, 1>; + def VUPHF : UnaryVRRa<"vuphf", 0xE7D7, z_unpack_high, v128g, v128f, 2>; + + // Unpack logical high. + def VUPLHB : UnaryVRRa<"vuplhb", 0xE7D5, z_unpackl_high, v128h, v128b, 0>; + def VUPLHH : UnaryVRRa<"vuplhh", 0xE7D5, z_unpackl_high, v128f, v128h, 1>; + def VUPLHF : UnaryVRRa<"vuplhf", 0xE7D5, z_unpackl_high, v128g, v128f, 2>; + + // Unpack low. + def VUPLB : UnaryVRRa<"vuplb", 0xE7D6, z_unpack_low, v128h, v128b, 0>; + def VUPLHW : UnaryVRRa<"vuplhw", 0xE7D6, z_unpack_low, v128f, v128h, 1>; + def VUPLF : UnaryVRRa<"vuplf", 0xE7D6, z_unpack_low, v128g, v128f, 2>; + + // Unpack logical low. + def VUPLLB : UnaryVRRa<"vupllb", 0xE7D4, z_unpackl_low, v128h, v128b, 0>; + def VUPLLH : UnaryVRRa<"vupllh", 0xE7D4, z_unpackl_low, v128f, v128h, 1>; + def VUPLLF : UnaryVRRa<"vupllf", 0xE7D4, z_unpackl_low, v128g, v128f, 2>; +} + +//===----------------------------------------------------------------------===// +// Instantiating generic operations for specific types. +//===----------------------------------------------------------------------===// + +multiclass GenericVectorOps<ValueType type, ValueType inttype> { + let Predicates = [FeatureVector] in { + def : Pat<(type (load bdxaddr12only:$addr)), + (VL bdxaddr12only:$addr)>; + def : Pat<(store (type VR128:$src), bdxaddr12only:$addr), + (VST VR128:$src, bdxaddr12only:$addr)>; + def : Pat<(type (vselect (inttype VR128:$x), VR128:$y, VR128:$z)), + (VSEL VR128:$y, VR128:$z, VR128:$x)>; + def : Pat<(type (vselect (inttype (z_vnot VR128:$x)), VR128:$y, VR128:$z)), + (VSEL VR128:$z, VR128:$y, VR128:$x)>; + } +} + +defm : GenericVectorOps<v16i8, v16i8>; +defm : GenericVectorOps<v8i16, v8i16>; +defm : GenericVectorOps<v4i32, v4i32>; +defm : GenericVectorOps<v2i64, v2i64>; +defm : GenericVectorOps<v4f32, v4i32>; +defm : GenericVectorOps<v2f64, v2i64>; + +//===----------------------------------------------------------------------===// +// Integer arithmetic +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Add. + def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>; + def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>; + def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>; + def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>; + def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>; + + // Add compute carry. + def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>; + def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>; + def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>; + def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>; + def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>; + + // Add with carry. + def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>; + + // Add with carry compute carry. + def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>; + + // And. + def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>; + + // And with complement. + def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>; + + // Average. + def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>; + def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>; + def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>; + def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>; + + // Average logical. + def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>; + def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>; + def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>; + def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>; + + // Checksum. + def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>; + + // Count leading zeros. + def VCLZB : UnaryVRRa<"vclzb", 0xE753, ctlz, v128b, v128b, 0>; + def VCLZH : UnaryVRRa<"vclzh", 0xE753, ctlz, v128h, v128h, 1>; + def VCLZF : UnaryVRRa<"vclzf", 0xE753, ctlz, v128f, v128f, 2>; + def VCLZG : UnaryVRRa<"vclzg", 0xE753, ctlz, v128g, v128g, 3>; + + // Count trailing zeros. + def VCTZB : UnaryVRRa<"vctzb", 0xE752, cttz, v128b, v128b, 0>; + def VCTZH : UnaryVRRa<"vctzh", 0xE752, cttz, v128h, v128h, 1>; + def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>; + def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>; + + // Exclusive or. + def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>; + + // Galois field multiply sum. + def VGFMB : BinaryVRRc<"vgfmb", 0xE7B4, int_s390_vgfmb, v128h, v128b, 0>; + def VGFMH : BinaryVRRc<"vgfmh", 0xE7B4, int_s390_vgfmh, v128f, v128h, 1>; + def VGFMF : BinaryVRRc<"vgfmf", 0xE7B4, int_s390_vgfmf, v128g, v128f, 2>; + def VGFMG : BinaryVRRc<"vgfmg", 0xE7B4, int_s390_vgfmg, v128q, v128g, 3>; + + // Galois field multiply sum and accumulate. + def VGFMAB : TernaryVRRd<"vgfmab", 0xE7BC, int_s390_vgfmab, v128h, v128b, 0>; + def VGFMAH : TernaryVRRd<"vgfmah", 0xE7BC, int_s390_vgfmah, v128f, v128h, 1>; + def VGFMAF : TernaryVRRd<"vgfmaf", 0xE7BC, int_s390_vgfmaf, v128g, v128f, 2>; + def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, int_s390_vgfmag, v128q, v128g, 3>; + + // Load complement. + def VLCB : UnaryVRRa<"vlcb", 0xE7DE, z_vneg, v128b, v128b, 0>; + def VLCH : UnaryVRRa<"vlch", 0xE7DE, z_vneg, v128h, v128h, 1>; + def VLCF : UnaryVRRa<"vlcf", 0xE7DE, z_vneg, v128f, v128f, 2>; + def VLCG : UnaryVRRa<"vlcg", 0xE7DE, z_vneg, v128g, v128g, 3>; + + // Load positive. + def VLPB : UnaryVRRa<"vlpb", 0xE7DF, z_viabs8, v128b, v128b, 0>; + def VLPH : UnaryVRRa<"vlph", 0xE7DF, z_viabs16, v128h, v128h, 1>; + def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>; + def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>; + + // Maximum. + def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>; + def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>; + def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>; + def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>; + + // Maximum logical. + def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>; + def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>; + def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>; + def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>; + + // Minimum. + def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>; + def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>; + def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>; + def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>; + + // Minimum logical. + def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>; + def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>; + def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>; + def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>; + + // Multiply and add low. + def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>; + def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>; + def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>; + + // Multiply and add high. + def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>; + def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>; + def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>; + + // Multiply and add logical high. + def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>; + def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>; + def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>; + + // Multiply and add even. + def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>; + def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>; + def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>; + + // Multiply and add logical even. + def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>; + def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>; + def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>; + + // Multiply and add odd. + def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>; + def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>; + def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>; + + // Multiply and add logical odd. + def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>; + def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>; + def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>; + + // Multiply high. + def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>; + def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>; + def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>; + + // Multiply logical high. + def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>; + def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>; + def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>; + + // Multiply low. + def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>; + def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>; + def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>; + + // Multiply even. + def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>; + def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>; + def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>; + + // Multiply logical even. + def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>; + def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>; + def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>; + + // Multiply odd. + def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>; + def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>; + def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>; + + // Multiply logical odd. + def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>; + def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>; + def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>; + + // Nor. + def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>; + + // Or. + def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>; + + // Population count. + def VPOPCT : BinaryVRRa<"vpopct", 0xE750>; + def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>; + + // Element rotate left logical (with vector shift amount). + def VERLLVB : BinaryVRRc<"verllvb", 0xE773, int_s390_verllvb, + v128b, v128b, 0>; + def VERLLVH : BinaryVRRc<"verllvh", 0xE773, int_s390_verllvh, + v128h, v128h, 1>; + def VERLLVF : BinaryVRRc<"verllvf", 0xE773, int_s390_verllvf, + v128f, v128f, 2>; + def VERLLVG : BinaryVRRc<"verllvg", 0xE773, int_s390_verllvg, + v128g, v128g, 3>; + + // Element rotate left logical (with scalar shift amount). + def VERLLB : BinaryVRSa<"verllb", 0xE733, int_s390_verllb, v128b, v128b, 0>; + def VERLLH : BinaryVRSa<"verllh", 0xE733, int_s390_verllh, v128h, v128h, 1>; + def VERLLF : BinaryVRSa<"verllf", 0xE733, int_s390_verllf, v128f, v128f, 2>; + def VERLLG : BinaryVRSa<"verllg", 0xE733, int_s390_verllg, v128g, v128g, 3>; + + // Element rotate and insert under mask. + def VERIMB : QuaternaryVRId<"verimb", 0xE772, int_s390_verimb, v128b, v128b, 0>; + def VERIMH : QuaternaryVRId<"verimh", 0xE772, int_s390_verimh, v128h, v128h, 1>; + def VERIMF : QuaternaryVRId<"verimf", 0xE772, int_s390_verimf, v128f, v128f, 2>; + def VERIMG : QuaternaryVRId<"verimg", 0xE772, int_s390_verimg, v128g, v128g, 3>; + + // Element shift left (with vector shift amount). + def VESLVB : BinaryVRRc<"veslvb", 0xE770, z_vshl, v128b, v128b, 0>; + def VESLVH : BinaryVRRc<"veslvh", 0xE770, z_vshl, v128h, v128h, 1>; + def VESLVF : BinaryVRRc<"veslvf", 0xE770, z_vshl, v128f, v128f, 2>; + def VESLVG : BinaryVRRc<"veslvg", 0xE770, z_vshl, v128g, v128g, 3>; + + // Element shift left (with scalar shift amount). + def VESLB : BinaryVRSa<"veslb", 0xE730, z_vshl_by_scalar, v128b, v128b, 0>; + def VESLH : BinaryVRSa<"veslh", 0xE730, z_vshl_by_scalar, v128h, v128h, 1>; + def VESLF : BinaryVRSa<"veslf", 0xE730, z_vshl_by_scalar, v128f, v128f, 2>; + def VESLG : BinaryVRSa<"veslg", 0xE730, z_vshl_by_scalar, v128g, v128g, 3>; + + // Element shift right arithmetic (with vector shift amount). + def VESRAVB : BinaryVRRc<"vesravb", 0xE77A, z_vsra, v128b, v128b, 0>; + def VESRAVH : BinaryVRRc<"vesravh", 0xE77A, z_vsra, v128h, v128h, 1>; + def VESRAVF : BinaryVRRc<"vesravf", 0xE77A, z_vsra, v128f, v128f, 2>; + def VESRAVG : BinaryVRRc<"vesravg", 0xE77A, z_vsra, v128g, v128g, 3>; + + // Element shift right arithmetic (with scalar shift amount). + def VESRAB : BinaryVRSa<"vesrab", 0xE73A, z_vsra_by_scalar, v128b, v128b, 0>; + def VESRAH : BinaryVRSa<"vesrah", 0xE73A, z_vsra_by_scalar, v128h, v128h, 1>; + def VESRAF : BinaryVRSa<"vesraf", 0xE73A, z_vsra_by_scalar, v128f, v128f, 2>; + def VESRAG : BinaryVRSa<"vesrag", 0xE73A, z_vsra_by_scalar, v128g, v128g, 3>; + + // Element shift right logical (with vector shift amount). + def VESRLVB : BinaryVRRc<"vesrlvb", 0xE778, z_vsrl, v128b, v128b, 0>; + def VESRLVH : BinaryVRRc<"vesrlvh", 0xE778, z_vsrl, v128h, v128h, 1>; + def VESRLVF : BinaryVRRc<"vesrlvf", 0xE778, z_vsrl, v128f, v128f, 2>; + def VESRLVG : BinaryVRRc<"vesrlvg", 0xE778, z_vsrl, v128g, v128g, 3>; + + // Element shift right logical (with scalar shift amount). + def VESRLB : BinaryVRSa<"vesrlb", 0xE738, z_vsrl_by_scalar, v128b, v128b, 0>; + def VESRLH : BinaryVRSa<"vesrlh", 0xE738, z_vsrl_by_scalar, v128h, v128h, 1>; + def VESRLF : BinaryVRSa<"vesrlf", 0xE738, z_vsrl_by_scalar, v128f, v128f, 2>; + def VESRLG : BinaryVRSa<"vesrlg", 0xE738, z_vsrl_by_scalar, v128g, v128g, 3>; + + // Shift left. + def VSL : BinaryVRRc<"vsl", 0xE774, int_s390_vsl, v128b, v128b>; + + // Shift left by byte. + def VSLB : BinaryVRRc<"vslb", 0xE775, int_s390_vslb, v128b, v128b>; + + // Shift left double by byte. + def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>; + def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z), + (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>; + + // Shift right arithmetic. + def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>; + + // Shift right arithmetic by byte. + def VSRAB : BinaryVRRc<"vsrab", 0xE77F, int_s390_vsrab, v128b, v128b>; + + // Shift right logical. + def VSRL : BinaryVRRc<"vsrl", 0xE77C, int_s390_vsrl, v128b, v128b>; + + // Shift right logical by byte. + def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>; + + // Subtract. + def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>; + def VSH : BinaryVRRc<"vsh", 0xE7F7, sub, v128h, v128h, 1>; + def VSF : BinaryVRRc<"vsf", 0xE7F7, sub, v128f, v128f, 2>; + def VSG : BinaryVRRc<"vsg", 0xE7F7, sub, v128g, v128g, 3>; + def VSQ : BinaryVRRc<"vsq", 0xE7F7, int_s390_vsq, v128q, v128q, 4>; + + // Subtract compute borrow indication. + def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, int_s390_vscbib, v128b, v128b, 0>; + def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, int_s390_vscbih, v128h, v128h, 1>; + def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, int_s390_vscbif, v128f, v128f, 2>; + def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, int_s390_vscbig, v128g, v128g, 3>; + def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, int_s390_vscbiq, v128q, v128q, 4>; + + // Subtract with borrow indication. + def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, int_s390_vsbiq, v128q, v128q, 4>; + + // Subtract with borrow compute borrow indication. + def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, int_s390_vsbcbiq, + v128q, v128q, 4>; + + // Sum across doubleword. + def VSUMGH : BinaryVRRc<"vsumgh", 0xE765, z_vsum, v128g, v128h, 1>; + def VSUMGF : BinaryVRRc<"vsumgf", 0xE765, z_vsum, v128g, v128f, 2>; + + // Sum across quadword. + def VSUMQF : BinaryVRRc<"vsumqf", 0xE767, z_vsum, v128q, v128f, 2>; + def VSUMQG : BinaryVRRc<"vsumqg", 0xE767, z_vsum, v128q, v128g, 3>; + + // Sum across word. + def VSUMB : BinaryVRRc<"vsumb", 0xE764, z_vsum, v128f, v128b, 0>; + def VSUMH : BinaryVRRc<"vsumh", 0xE764, z_vsum, v128f, v128h, 1>; +} + +// Instantiate the bitwise ops for type TYPE. +multiclass BitwiseVectorOps<ValueType type> { + let Predicates = [FeatureVector] in { + def : Pat<(type (and VR128:$x, VR128:$y)), (VN VR128:$x, VR128:$y)>; + def : Pat<(type (and VR128:$x, (z_vnot VR128:$y))), + (VNC VR128:$x, VR128:$y)>; + def : Pat<(type (or VR128:$x, VR128:$y)), (VO VR128:$x, VR128:$y)>; + def : Pat<(type (xor VR128:$x, VR128:$y)), (VX VR128:$x, VR128:$y)>; + def : Pat<(type (or (and VR128:$x, VR128:$z), + (and VR128:$y, (z_vnot VR128:$z)))), + (VSEL VR128:$x, VR128:$y, VR128:$z)>; + def : Pat<(type (z_vnot (or VR128:$x, VR128:$y))), + (VNO VR128:$x, VR128:$y)>; + def : Pat<(type (z_vnot VR128:$x)), (VNO VR128:$x, VR128:$x)>; + } +} + +defm : BitwiseVectorOps<v16i8>; +defm : BitwiseVectorOps<v8i16>; +defm : BitwiseVectorOps<v4i32>; +defm : BitwiseVectorOps<v2i64>; + +// Instantiate additional patterns for absolute-related expressions on +// type TYPE. LC is the negate instruction for TYPE and LP is the absolute +// instruction. +multiclass IntegerAbsoluteVectorOps<ValueType type, Instruction lc, + Instruction lp, int shift> { + let Predicates = [FeatureVector] in { + def : Pat<(type (vselect (type (z_vicmph_zero VR128:$x)), + (z_vneg VR128:$x), VR128:$x)), + (lc (lp VR128:$x))>; + def : Pat<(type (vselect (type (z_vnot (z_vicmph_zero VR128:$x))), + VR128:$x, (z_vneg VR128:$x))), + (lc (lp VR128:$x))>; + def : Pat<(type (vselect (type (z_vicmpl_zero VR128:$x)), + VR128:$x, (z_vneg VR128:$x))), + (lc (lp VR128:$x))>; + def : Pat<(type (vselect (type (z_vnot (z_vicmpl_zero VR128:$x))), + (z_vneg VR128:$x), VR128:$x)), + (lc (lp VR128:$x))>; + def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)), + (z_vneg VR128:$x)), + (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))), + VR128:$x))), + (lp VR128:$x)>; + def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)), + VR128:$x), + (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))), + (z_vneg VR128:$x)))), + (lc (lp VR128:$x))>; + } +} + +defm : IntegerAbsoluteVectorOps<v16i8, VLCB, VLPB, 7>; +defm : IntegerAbsoluteVectorOps<v8i16, VLCH, VLPH, 15>; +defm : IntegerAbsoluteVectorOps<v4i32, VLCF, VLPF, 31>; +defm : IntegerAbsoluteVectorOps<v2i64, VLCG, VLPG, 63>; + +// Instantiate minimum- and maximum-related patterns for TYPE. CMPH is the +// signed or unsigned "set if greater than" comparison instruction and +// MIN and MAX are the associated minimum and maximum instructions. +multiclass IntegerMinMaxVectorOps<ValueType type, SDPatternOperator cmph, + Instruction min, Instruction max> { + let Predicates = [FeatureVector] in { + def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$x, VR128:$y)), + (max VR128:$x, VR128:$y)>; + def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$y, VR128:$x)), + (min VR128:$x, VR128:$y)>; + def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)), + VR128:$x, VR128:$y)), + (min VR128:$x, VR128:$y)>; + def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)), + VR128:$y, VR128:$x)), + (max VR128:$x, VR128:$y)>; + } +} + +// Signed min/max. +defm : IntegerMinMaxVectorOps<v16i8, z_vicmph, VMNB, VMXB>; +defm : IntegerMinMaxVectorOps<v8i16, z_vicmph, VMNH, VMXH>; +defm : IntegerMinMaxVectorOps<v4i32, z_vicmph, VMNF, VMXF>; +defm : IntegerMinMaxVectorOps<v2i64, z_vicmph, VMNG, VMXG>; + +// Unsigned min/max. +defm : IntegerMinMaxVectorOps<v16i8, z_vicmphl, VMNLB, VMXLB>; +defm : IntegerMinMaxVectorOps<v8i16, z_vicmphl, VMNLH, VMXLH>; +defm : IntegerMinMaxVectorOps<v4i32, z_vicmphl, VMNLF, VMXLF>; +defm : IntegerMinMaxVectorOps<v2i64, z_vicmphl, VMNLG, VMXLG>; + +//===----------------------------------------------------------------------===// +// Integer comparison +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Element compare. + let Defs = [CC] in { + def VECB : CompareVRRa<"vecb", 0xE7DB, null_frag, v128b, 0>; + def VECH : CompareVRRa<"vech", 0xE7DB, null_frag, v128h, 1>; + def VECF : CompareVRRa<"vecf", 0xE7DB, null_frag, v128f, 2>; + def VECG : CompareVRRa<"vecg", 0xE7DB, null_frag, v128g, 3>; + } + + // Element compare logical. + let Defs = [CC] in { + def VECLB : CompareVRRa<"veclb", 0xE7D9, null_frag, v128b, 0>; + def VECLH : CompareVRRa<"veclh", 0xE7D9, null_frag, v128h, 1>; + def VECLF : CompareVRRa<"veclf", 0xE7D9, null_frag, v128f, 2>; + def VECLG : CompareVRRa<"veclg", 0xE7D9, null_frag, v128g, 3>; + } + + // Compare equal. + defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, z_vicmpes, + v128b, v128b, 0>; + defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, z_vicmpes, + v128h, v128h, 1>; + defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, z_vicmpe, z_vicmpes, + v128f, v128f, 2>; + defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, z_vicmpes, + v128g, v128g, 3>; + + // Compare high. + defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, z_vicmphs, + v128b, v128b, 0>; + defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, z_vicmphs, + v128h, v128h, 1>; + defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, z_vicmph, z_vicmphs, + v128f, v128f, 2>; + defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, z_vicmphs, + v128g, v128g, 3>; + + // Compare high logical. + defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, z_vicmphls, + v128b, v128b, 0>; + defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, z_vicmphls, + v128h, v128h, 1>; + defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, z_vicmphl, z_vicmphls, + v128f, v128f, 2>; + defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, z_vicmphls, + v128g, v128g, 3>; + + // Test under mask. + let Defs = [CC] in + def VTM : CompareVRRa<"vtm", 0xE7D8, z_vtm, v128b, 0>; +} + +//===----------------------------------------------------------------------===// +// Floating-point arithmetic +//===----------------------------------------------------------------------===// + +// See comments in SystemZInstrFP.td for the suppression flags and +// rounding modes. +multiclass VectorRounding<Instruction insn, TypedReg tr> { + def : FPConversion<insn, frint, tr, tr, 0, 0>; + def : FPConversion<insn, fnearbyint, tr, tr, 4, 0>; + def : FPConversion<insn, ffloor, tr, tr, 4, 7>; + def : FPConversion<insn, fceil, tr, tr, 4, 6>; + def : FPConversion<insn, ftrunc, tr, tr, 4, 5>; + def : FPConversion<insn, frnd, tr, tr, 4, 1>; +} + +let Predicates = [FeatureVector] in { + // Add. + def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>; + def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>; + + // Convert from fixed 64-bit. + def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; + def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>; + def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>; + + // Convert from logical 64-bit. + def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>; + def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>; + def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>; + + // Convert to fixed 64-bit. + def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>; + def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>; + // Rounding mode should agree with SystemZInstrFP.td. + def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>; + + // Convert to logical 64-bit. + def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>; + def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>; + // Rounding mode should agree with SystemZInstrFP.td. + def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>; + + // Divide. + def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>; + def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>; + + // Load FP integer. + def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>; + def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; + defm : VectorRounding<VFIDB, v128db>; + defm : VectorRounding<WFIDB, v64db>; + + // Load lengthened. + def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>; + def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fextend, v64db, v32eb, 2, 8>; + + // Load rounded, + def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>; + def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>; + def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; + def : FPConversion<WLEDB, fround, v32eb, v64db, 0, 0>; + + // Multiply. + def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>; + def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>; + + // Multiply and add. + def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>; + def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>; + + // Multiply and subtract. + def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>; + def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>; + + // Load complement, + def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>; + def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>; + + // Load negative. + def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>; + def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>; + + // Load positive. + def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>; + def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>; + + // Square root. + def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>; + def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>; + + // Subtract. + def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>; + def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>; + + // Test data class immediate. + let Defs = [CC] in { + def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>; + def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>; + } +} + +//===----------------------------------------------------------------------===// +// Floating-point comparison +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + // Compare scalar. + let Defs = [CC] in + def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; + + // Compare and signal scalar. + let Defs = [CC] in + def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; + + // Compare equal. + defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes, + v128g, v128db, 3, 0>; + defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, + v64g, v64db, 3, 8>; + + // Compare high. + defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs, + v128g, v128db, 3, 0>; + defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, + v64g, v64db, 3, 8>; + + // Compare high or equal. + defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes, + v128g, v128db, 3, 0>; + defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, + v64g, v64db, 3, 8>; +} + +//===----------------------------------------------------------------------===// +// Conversions +//===----------------------------------------------------------------------===// + +def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; + +def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; + +def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; + +def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; +def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; + +def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; + +def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; +def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; + +//===----------------------------------------------------------------------===// +// Replicating scalars +//===----------------------------------------------------------------------===// + +// Define patterns for replicating a scalar GR32 into a vector of type TYPE. +// INDEX is 8 minus the element size in bytes. +class VectorReplicateScalar<ValueType type, Instruction insn, bits<16> index> + : Pat<(type (z_replicate GR32:$scalar)), + (insn (VLVGP32 GR32:$scalar, GR32:$scalar), index)>; + +def : VectorReplicateScalar<v16i8, VREPB, 7>; +def : VectorReplicateScalar<v8i16, VREPH, 3>; +def : VectorReplicateScalar<v4i32, VREPF, 1>; + +// i64 replications are just a single isntruction. +def : Pat<(v2i64 (z_replicate GR64:$scalar)), + (VLVGP GR64:$scalar, GR64:$scalar)>; + +//===----------------------------------------------------------------------===// +// Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +// Moving 32-bit values between GPRs and FPRs can be done using VLVGF +// and VLGVF. +def LEFR : UnaryAliasVRS<VR32, GR32>; +def LFER : UnaryAliasVRS<GR64, VR32>; +def : Pat<(f32 (bitconvert (i32 GR32:$src))), (LEFR GR32:$src)>; +def : Pat<(i32 (bitconvert (f32 VR32:$src))), + (EXTRACT_SUBREG (LFER VR32:$src), subreg_l32)>; + +// Floating-point values are stored in element 0 of the corresponding +// vector register. Scalar to vector conversion is just a subreg and +// scalar replication can just replicate element 0 of the vector register. +multiclass ScalarToVectorFP<Instruction vrep, ValueType vt, RegisterOperand cls, + SubRegIndex subreg> { + def : Pat<(vt (scalar_to_vector cls:$scalar)), + (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, subreg)>; + def : Pat<(vt (z_replicate cls:$scalar)), + (vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, + subreg), 0)>; +} +defm : ScalarToVectorFP<VREPF, v4f32, FP32, subreg_r32>; +defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_r64>; + +// Match v2f64 insertions. The AddedComplexity counters the 3 added by +// TableGen for the base register operand in VLVG-based integer insertions +// and ensures that this version is strictly better. +let AddedComplexity = 4 in { + def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0), + (VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt, + subreg_r64), VR128:$vec, 1)>; + def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1), + (VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt, + subreg_r64), 0)>; +} + +// We extract floating-point element X by replicating (for elements other +// than 0) and then taking a high subreg. The AddedComplexity counters the +// 3 added by TableGen for the base register operand in VLGV-based integer +// extractions and ensures that this version is strictly better. +let AddedComplexity = 4 in { + def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), 0)), + (EXTRACT_SUBREG VR128:$vec, subreg_r32)>; + def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), imm32zx2:$index)), + (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_r32)>; + + def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)), + (EXTRACT_SUBREG VR128:$vec, subreg_r64)>; + def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)), + (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>; +} + +//===----------------------------------------------------------------------===// +// String instructions +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVector] in { + defm VFAEB : TernaryVRRbSPair<"vfaeb", 0xE782, int_s390_vfaeb, z_vfae_cc, + v128b, v128b, 0, 0>; + defm VFAEH : TernaryVRRbSPair<"vfaeh", 0xE782, int_s390_vfaeh, z_vfae_cc, + v128h, v128h, 1, 0>; + defm VFAEF : TernaryVRRbSPair<"vfaef", 0xE782, int_s390_vfaef, z_vfae_cc, + v128f, v128f, 2, 0>; + defm VFAEZB : TernaryVRRbSPair<"vfaezb", 0xE782, int_s390_vfaezb, z_vfaez_cc, + v128b, v128b, 0, 2>; + defm VFAEZH : TernaryVRRbSPair<"vfaezh", 0xE782, int_s390_vfaezh, z_vfaez_cc, + v128h, v128h, 1, 2>; + defm VFAEZF : TernaryVRRbSPair<"vfaezf", 0xE782, int_s390_vfaezf, z_vfaez_cc, + v128f, v128f, 2, 2>; + + defm VFEEB : BinaryVRRbSPair<"vfeeb", 0xE780, int_s390_vfeeb, z_vfee_cc, + v128b, v128b, 0, 0, 1>; + defm VFEEH : BinaryVRRbSPair<"vfeeh", 0xE780, int_s390_vfeeh, z_vfee_cc, + v128h, v128h, 1, 0, 1>; + defm VFEEF : BinaryVRRbSPair<"vfeef", 0xE780, int_s390_vfeef, z_vfee_cc, + v128f, v128f, 2, 0, 1>; + defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, int_s390_vfeezb, z_vfeez_cc, + v128b, v128b, 0, 2, 3>; + defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, int_s390_vfeezh, z_vfeez_cc, + v128h, v128h, 1, 2, 3>; + defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, int_s390_vfeezf, z_vfeez_cc, + v128f, v128f, 2, 2, 3>; + + defm VFENEB : BinaryVRRbSPair<"vfeneb", 0xE781, int_s390_vfeneb, z_vfene_cc, + v128b, v128b, 0, 0, 1>; + defm VFENEH : BinaryVRRbSPair<"vfeneh", 0xE781, int_s390_vfeneh, z_vfene_cc, + v128h, v128h, 1, 0, 1>; + defm VFENEF : BinaryVRRbSPair<"vfenef", 0xE781, int_s390_vfenef, z_vfene_cc, + v128f, v128f, 2, 0, 1>; + defm VFENEZB : BinaryVRRbSPair<"vfenezb", 0xE781, int_s390_vfenezb, + z_vfenez_cc, v128b, v128b, 0, 2, 3>; + defm VFENEZH : BinaryVRRbSPair<"vfenezh", 0xE781, int_s390_vfenezh, + z_vfenez_cc, v128h, v128h, 1, 2, 3>; + defm VFENEZF : BinaryVRRbSPair<"vfenezf", 0xE781, int_s390_vfenezf, + z_vfenez_cc, v128f, v128f, 2, 2, 3>; + + defm VISTRB : UnaryVRRaSPair<"vistrb", 0xE75C, int_s390_vistrb, z_vistr_cc, + v128b, v128b, 0>; + defm VISTRH : UnaryVRRaSPair<"vistrh", 0xE75C, int_s390_vistrh, z_vistr_cc, + v128h, v128h, 1>; + defm VISTRF : UnaryVRRaSPair<"vistrf", 0xE75C, int_s390_vistrf, z_vistr_cc, + v128f, v128f, 2>; + + defm VSTRCB : QuaternaryVRRdSPair<"vstrcb", 0xE78A, int_s390_vstrcb, + z_vstrc_cc, v128b, v128b, 0, 0>; + defm VSTRCH : QuaternaryVRRdSPair<"vstrch", 0xE78A, int_s390_vstrch, + z_vstrc_cc, v128h, v128h, 1, 0>; + defm VSTRCF : QuaternaryVRRdSPair<"vstrcf", 0xE78A, int_s390_vstrcf, + z_vstrc_cc, v128f, v128f, 2, 0>; + defm VSTRCZB : QuaternaryVRRdSPair<"vstrczb", 0xE78A, int_s390_vstrczb, + z_vstrcz_cc, v128b, v128b, 0, 2>; + defm VSTRCZH : QuaternaryVRRdSPair<"vstrczh", 0xE78A, int_s390_vstrczh, + z_vstrcz_cc, v128h, v128h, 1, 2>; + defm VSTRCZF : QuaternaryVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf, + z_vstrcz_cc, v128f, v128f, 2, 2>; +} diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp new file mode 100644 index 0000000000000..24165be29ae77 --- /dev/null +++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -0,0 +1,143 @@ +//===-- SystemZLDCleanup.cpp - Clean up local-dynamic TLS accesses --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass combines multiple accesses to local-dynamic TLS variables so that +// the TLS base address for the module is only fetched once per execution path +// through the function. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "SystemZMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +namespace { + +class SystemZLDCleanup : public MachineFunctionPass { +public: + static char ID; + SystemZLDCleanup(const SystemZTargetMachine &tm) + : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {} + + const char *getPassName() const override { + return "SystemZ Local Dynamic TLS Access Clean-up"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg); + MachineInstr *ReplaceTLSCall(MachineInstr *I, unsigned TLSBaseAddrReg); + MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg); + + const SystemZInstrInfo *TII; + MachineFunction *MF; +}; + +char SystemZLDCleanup::ID = 0; + +} // end anonymous namespace + +FunctionPass *llvm::createSystemZLDCleanupPass(SystemZTargetMachine &TM) { + return new SystemZLDCleanup(TM); +} + +void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) { + TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo()); + MF = &F; + + SystemZMachineFunctionInfo* MFI = F.getInfo<SystemZMachineFunctionInfo>(); + if (MFI->getNumLocalDynamicTLSAccesses() < 2) { + // No point folding accesses if there isn't at least two. + return false; + } + + MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>(); + return VisitNode(DT->getRootNode(), 0); +} + +// Visit the dominator subtree rooted at Node in pre-order. +// If TLSBaseAddrReg is non-null, then use that to replace any +// TLS_LDCALL instructions. Otherwise, create the register +// when the first such instruction is seen, and then use it +// as we encounter more instructions. +bool SystemZLDCleanup::VisitNode(MachineDomTreeNode *Node, + unsigned TLSBaseAddrReg) { + MachineBasicBlock *BB = Node->getBlock(); + bool Changed = false; + + // Traverse the current block. + for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { + switch (I->getOpcode()) { + case SystemZ::TLS_LDCALL: + if (TLSBaseAddrReg) + I = ReplaceTLSCall(I, TLSBaseAddrReg); + else + I = SetRegister(I, &TLSBaseAddrReg); + Changed = true; + break; + default: + break; + } + } + + // Visit the children of this block in the dominator tree. + for (auto I = Node->begin(), E = Node->end(); I != E; ++I) + Changed |= VisitNode(*I, TLSBaseAddrReg); + + return Changed; +} + +// Replace the TLS_LDCALL instruction I with a copy from TLSBaseAddrReg, +// returning the new instruction. +MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I, + unsigned TLSBaseAddrReg) { + // Insert a Copy from TLSBaseAddrReg to R2. + MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), SystemZ::R2D) + .addReg(TLSBaseAddrReg); + + // Erase the TLS_LDCALL instruction. + I->eraseFromParent(); + + return Copy; +} + +// Create a virtal register in *TLSBaseAddrReg, and populate it by +// inserting a copy instruction after I. Returns the new instruction. +MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I, + unsigned *TLSBaseAddrReg) { + // Create a virtual register for the TLS base address. + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + *TLSBaseAddrReg = RegInfo.createVirtualRegister(&SystemZ::GR64BitRegClass); + + // Insert a copy from R2 to TLSBaseAddrReg. + MachineInstr *Next = I->getNextNode(); + MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), *TLSBaseAddrReg) + .addReg(SystemZ::R2D); + + return Copy; +} + diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp index df561e2d8002d..a1dcedab54e7d 100644 --- a/lib/Target/SystemZ/SystemZMCInstLower.cpp +++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp @@ -11,6 +11,7 @@ #include "SystemZAsmPrinter.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" using namespace llvm; @@ -22,6 +23,8 @@ static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) { return MCSymbolRefExpr::VK_None; case SystemZII::MO_GOT: return MCSymbolRefExpr::VK_GOT; + case SystemZII::MO_INDNTPOFF: + return MCSymbolRefExpr::VK_INDNTPOFF; } llvm_unreachable("Unrecognised MO_ACCESS_MODEL"); } @@ -77,14 +80,14 @@ SystemZMCInstLower::getExpr(const MachineOperand &MO, MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const { switch (MO.getType()) { case MachineOperand::MO_Register: - return MCOperand::CreateReg(MO.getReg()); + return MCOperand::createReg(MO.getReg()); case MachineOperand::MO_Immediate: - return MCOperand::CreateImm(MO.getImm()); + return MCOperand::createImm(MO.getImm()); default: { MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags()); - return MCOperand::CreateExpr(getExpr(MO, Kind)); + return MCOperand::createExpr(getExpr(MO, Kind)); } } } diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index 92c2ce7324a01..34fc36d6bf6c9 100644 --- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -23,11 +23,13 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo { unsigned VarArgsFrameIndex; unsigned RegSaveFrameIndex; bool ManipulatesSP; + unsigned NumLocalDynamics; public: explicit SystemZMachineFunctionInfo(MachineFunction &MF) : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), - VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false) {} + VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false), + NumLocalDynamics(0) {} // Get and set the first call-saved GPR that should be saved and restored // by this function. This is 0 if no GPRs need to be saved or restored. @@ -61,6 +63,10 @@ public: // e.g. through STACKSAVE or STACKRESTORE. bool getManipulatesSP() const { return ManipulatesSP; } void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; } + + // Count number of local-dynamic TLS symbols used. + unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } + void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index 7be81dca727b1..9af90d492cf84 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -16,6 +16,11 @@ class ImmediateAsmOperand<string name> let Name = name; let RenderMethod = "addImmOperands"; } +class ImmediateTLSAsmOperand<string name> + : AsmOperandClass { + let Name = name; + let RenderMethod = "addImmTLSOperands"; +} // Constructs both a DAG pattern and instruction operand for an immediate // of type VT. PRED returns true if a node is acceptable and XFORM returns @@ -34,6 +39,11 @@ class PCRelAsmOperand<string size> : ImmediateAsmOperand<"PCRel"##size> { let PredicateMethod = "isImm"; let ParserMethod = "parsePCRel"##size; } +class PCRelTLSAsmOperand<string size> + : ImmediateTLSAsmOperand<"PCRelTLS"##size> { + let PredicateMethod = "isImmTLS"; + let ParserMethod = "parsePCRelTLS"##size; +} // Constructs an operand for a PC-relative address with address type VT. // ASMOP is the associated asm operand. @@ -41,6 +51,10 @@ class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> { let PrintMethod = "printPCRelOperand"; let ParserMatchClass = asmop; } +class PCRelTLSOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> { + let PrintMethod = "printPCRelTLSOperand"; + let ParserMatchClass = asmop; +} // Constructs both a DAG pattern and instruction operand for a PC-relative // address with address size VT. SELF is the name of the operand and @@ -64,6 +78,22 @@ class AddressAsmOperand<string format, string bitsize, string dispsize, let RenderMethod = "add"##format##"Operands"; } +// Constructs an instruction operand for an addressing mode. FORMAT, +// BITSIZE, DISPSIZE and LENGTH are the parameters to an associated +// AddressAsmOperand. OPERANDS is a list of individual operands +// (base register, displacement, etc.). +class AddressOperand<string bitsize, string dispsize, string length, + string format, dag operands> + : Operand<!cast<ValueType>("i"##bitsize)> { + let PrintMethod = "print"##format##"Operand"; + let EncoderMethod = "get"##format##dispsize##length##"Encoding"; + let DecoderMethod = + "decode"##format##bitsize##"Disp"##dispsize##length##"Operand"; + let MIOperandInfo = operands; + let ParserMatchClass = + !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length); +} + // Constructs both a DAG pattern and instruction operand for an addressing mode. // FORMAT, BITSIZE, DISPSIZE and LENGTH are the parameters to an associated // AddressAsmOperand. OPERANDS is a list of NUMOPS individual operands @@ -79,15 +109,7 @@ class AddressingMode<string seltype, string bitsize, string dispsize, : ComplexPattern<!cast<ValueType>("i"##bitsize), numops, "select"##seltype##dispsize##suffix##length, [add, sub, or, frameindex, z_adjdynalloc]>, - Operand<!cast<ValueType>("i"##bitsize)> { - let PrintMethod = "print"##format##"Operand"; - let EncoderMethod = "get"##format##dispsize##length##"Encoding"; - let DecoderMethod = - "decode"##format##bitsize##"Disp"##dispsize##length##"Operand"; - let MIOperandInfo = operands; - let ParserMatchClass = - !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length); -} + AddressOperand<bitsize, dispsize, length, format, operands>; // An addressing mode with a base and displacement but no index. class BDMode<string type, string bitsize, string dispsize, string suffix> @@ -111,6 +133,13 @@ class BDLMode<string type, string bitsize, string dispsize, string suffix, !cast<Immediate>("disp"##dispsize##"imm"##bitsize), !cast<Immediate>("imm"##bitsize))>; +// An addressing mode with a base, displacement and a vector index. +class BDVMode<string bitsize, string dispsize> + : AddressOperand<bitsize, dispsize, "", "BDVAddr", + (ops !cast<RegisterOperand>("ADDR"##bitsize), + !cast<Immediate>("disp"##dispsize##"imm"##bitsize), + !cast<RegisterOperand>("VR128"))>; + //===----------------------------------------------------------------------===// // Extracting immediate operands from nodes // These all create MVT::i64 nodes to ensure the value is not sign-extended @@ -120,82 +149,105 @@ class BDLMode<string type, string bitsize, string dispsize, string suffix, // Bits 0-15 (counting from the lsb). def LL16 : SDNodeXForm<imm, [{ uint64_t Value = N->getZExtValue() & 0x000000000000FFFFULL; - return CurDAG->getTargetConstant(Value, MVT::i64); + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); }]>; // Bits 16-31 (counting from the lsb). def LH16 : SDNodeXForm<imm, [{ uint64_t Value = (N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16; - return CurDAG->getTargetConstant(Value, MVT::i64); + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); }]>; // Bits 32-47 (counting from the lsb). def HL16 : SDNodeXForm<imm, [{ uint64_t Value = (N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32; - return CurDAG->getTargetConstant(Value, MVT::i64); + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); }]>; // Bits 48-63 (counting from the lsb). def HH16 : SDNodeXForm<imm, [{ uint64_t Value = (N->getZExtValue() & 0xFFFF000000000000ULL) >> 48; - return CurDAG->getTargetConstant(Value, MVT::i64); + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); }]>; // Low 32 bits. def LF32 : SDNodeXForm<imm, [{ uint64_t Value = N->getZExtValue() & 0x00000000FFFFFFFFULL; - return CurDAG->getTargetConstant(Value, MVT::i64); + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); }]>; // High 32 bits. def HF32 : SDNodeXForm<imm, [{ uint64_t Value = N->getZExtValue() >> 32; - return CurDAG->getTargetConstant(Value, MVT::i64); + return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64); }]>; // Truncate an immediate to a 8-bit signed quantity. def SIMM8 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), MVT::i64); + return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), SDLoc(N), + MVT::i64); }]>; // Truncate an immediate to a 8-bit unsigned quantity. def UIMM8 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()), MVT::i64); + return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()), SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 8-bit unsigned quantity and mask off low bit. +def UIMM8EVEN : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & 0xfe, SDLoc(N), + MVT::i64); +}]>; + +// Truncate an immediate to a 12-bit unsigned quantity. +def UIMM12 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & 0xfff, SDLoc(N), + MVT::i64); }]>; // Truncate an immediate to a 16-bit signed quantity. def SIMM16 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(int16_t(N->getZExtValue()), MVT::i64); + return CurDAG->getTargetConstant(int16_t(N->getZExtValue()), SDLoc(N), + MVT::i64); }]>; // Truncate an immediate to a 16-bit unsigned quantity. def UIMM16 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(uint16_t(N->getZExtValue()), MVT::i64); + return CurDAG->getTargetConstant(uint16_t(N->getZExtValue()), SDLoc(N), + MVT::i64); }]>; // Truncate an immediate to a 32-bit signed quantity. def SIMM32 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(int32_t(N->getZExtValue()), MVT::i64); + return CurDAG->getTargetConstant(int32_t(N->getZExtValue()), SDLoc(N), + MVT::i64); }]>; // Truncate an immediate to a 32-bit unsigned quantity. def UIMM32 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(uint32_t(N->getZExtValue()), MVT::i64); + return CurDAG->getTargetConstant(uint32_t(N->getZExtValue()), SDLoc(N), + MVT::i64); }]>; // Negate and then truncate an immediate to a 32-bit unsigned quantity. def NEGIMM32 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), MVT::i64); + return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), SDLoc(N), + MVT::i64); }]>; //===----------------------------------------------------------------------===// // Immediate asm operands. //===----------------------------------------------------------------------===// +def U1Imm : ImmediateAsmOperand<"U1Imm">; +def U2Imm : ImmediateAsmOperand<"U2Imm">; +def U3Imm : ImmediateAsmOperand<"U3Imm">; def U4Imm : ImmediateAsmOperand<"U4Imm">; def U6Imm : ImmediateAsmOperand<"U6Imm">; def S8Imm : ImmediateAsmOperand<"S8Imm">; def U8Imm : ImmediateAsmOperand<"U8Imm">; +def U12Imm : ImmediateAsmOperand<"U12Imm">; def S16Imm : ImmediateAsmOperand<"S16Imm">; def U16Imm : ImmediateAsmOperand<"U16Imm">; def S32Imm : ImmediateAsmOperand<"S32Imm">; @@ -226,10 +278,28 @@ def imm32lh16c : Immediate<i32, [{ }], LH16, "U16Imm">; // Short immediates +def imm32zx1 : Immediate<i32, [{ + return isUInt<1>(N->getZExtValue()); +}], NOOP_SDNodeXForm, "U1Imm">; + +def imm32zx2 : Immediate<i32, [{ + return isUInt<2>(N->getZExtValue()); +}], NOOP_SDNodeXForm, "U2Imm">; + +def imm32zx3 : Immediate<i32, [{ + return isUInt<3>(N->getZExtValue()); +}], NOOP_SDNodeXForm, "U3Imm">; + def imm32zx4 : Immediate<i32, [{ return isUInt<4>(N->getZExtValue()); }], NOOP_SDNodeXForm, "U4Imm">; +// Note: this enforces an even value during code generation only. +// When used from the assembler, any 4-bit value is allowed. +def imm32zx4even : Immediate<i32, [{ + return isUInt<4>(N->getZExtValue()); +}], UIMM8EVEN, "U4Imm">; + def imm32zx6 : Immediate<i32, [{ return isUInt<6>(N->getZExtValue()); }], NOOP_SDNodeXForm, "U6Imm">; @@ -244,6 +314,10 @@ def imm32zx8 : Immediate<i32, [{ def imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">; +def imm32zx12 : Immediate<i32, [{ + return isUInt<12>(N->getZExtValue()); +}], UIMM12, "U12Imm">; + def imm32sx16 : Immediate<i32, [{ return isInt<16>(N->getSExtValue()); }], SIMM16, "S16Imm">; @@ -370,6 +444,8 @@ def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>; // PC-relative asm operands. def PCRel16 : PCRelAsmOperand<"16">; def PCRel32 : PCRelAsmOperand<"32">; +def PCRelTLS16 : PCRelTLSAsmOperand<"16">; +def PCRelTLS32 : PCRelTLSAsmOperand<"32">; // PC-relative offsets of a basic block. The offset is sign-extended // and multiplied by 2. @@ -382,6 +458,20 @@ def brtarget32 : PCRelOperand<OtherVT, PCRel32> { let DecoderMethod = "decodePC32DBLOperand"; } +// Variants of brtarget16/32 with an optional additional TLS symbol. +// These are used to annotate calls to __tls_get_offset. +def tlssym : Operand<i64> { } +def brtarget16tls : PCRelTLSOperand<OtherVT, PCRelTLS16> { + let MIOperandInfo = (ops brtarget16:$func, tlssym:$sym); + let EncoderMethod = "getPC16DBLTLSEncoding"; + let DecoderMethod = "decodePC16DBLOperand"; +} +def brtarget32tls : PCRelTLSOperand<OtherVT, PCRelTLS32> { + let MIOperandInfo = (ops brtarget32:$func, tlssym:$sym); + let EncoderMethod = "getPC32DBLTLSEncoding"; + let DecoderMethod = "decodePC32DBLOperand"; +} + // A PC-relative offset of a global value. The offset is sign-extended // and multiplied by 2. def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> { @@ -408,6 +498,7 @@ def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">; def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">; def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">; +def BDVAddr64Disp12 : AddressAsmOperand<"BDVAddr", "64", "12">; // DAG patterns and operands for addressing modes. Each mode has // the form <type><range><group>[<len>] where: @@ -420,6 +511,7 @@ def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">; // laaddr : like bdxaddr, but used for Load Address operations // dynalloc : base + displacement + index + ADJDYNALLOC // bdladdr : base + displacement with a length field +// bdvaddr : base + displacement with a vector index // // <range> is one of: // 12 : the displacement is an unsigned 12-bit value @@ -452,6 +544,7 @@ def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">; def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">; def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">; def bdladdr12onlylen8 : BDLMode<"BDLAddr", "64", "12", "Only", "8">; +def bdvaddr12only : BDVMode< "64", "12">; //===----------------------------------------------------------------------===// // Miscellaneous diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index c70e662db4270..3c95a1e11b45a 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -79,6 +79,64 @@ def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; def SDT_ZPrefetch : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; +def SDT_ZTBegin : SDTypeProfile<0, 2, + [SDTCisPtrTy<0>, + SDTCisVT<1, i32>]>; +def SDT_ZInsertVectorElt : SDTypeProfile<1, 3, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisVT<3, i32>]>; +def SDT_ZExtractVectorElt : SDTypeProfile<1, 2, + [SDTCisVec<1>, + SDTCisVT<2, i32>]>; +def SDT_ZReplicate : SDTypeProfile<1, 1, + [SDTCisVec<0>]>; +def SDT_ZVecUnaryConv : SDTypeProfile<1, 1, + [SDTCisVec<0>, + SDTCisVec<1>]>; +def SDT_ZVecUnary : SDTypeProfile<1, 1, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>]>; +def SDT_ZVecBinary : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>]>; +def SDT_ZVecBinaryInt : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def SDT_ZVecBinaryConv : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisVec<1>, + SDTCisSameAs<1, 2>]>; +def SDT_ZVecBinaryConvInt : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisVec<1>, + SDTCisVT<2, i32>]>; +def SDT_ZRotateMask : SDTypeProfile<1, 2, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; +def SDT_ZJoinDwords : SDTypeProfile<1, 2, + [SDTCisVT<0, v2i64>, + SDTCisVT<1, i64>, + SDTCisVT<2, i64>]>; +def SDT_ZVecTernary : SDTypeProfile<1, 3, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>; +def SDT_ZVecTernaryInt : SDTypeProfile<1, 3, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisVT<3, i32>]>; +def SDT_ZVecQuaternaryInt : SDTypeProfile<1, 4, + [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisVT<4, i32>]>; //===----------------------------------------------------------------------===// // Node definitions @@ -90,6 +148,7 @@ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart, def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>; +def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>; // Nodes for SystemZISD::*. See SystemZISelLowering.h for more details. def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone, @@ -100,6 +159,12 @@ def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall, def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; +def z_tls_gdcall : SDNode<"SystemZISD::TLS_GDCALL", SDT_ZCall, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, + SDNPVariadic]>; +def z_tls_ldcall : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, + SDNPVariadic]>; def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET", SDT_ZWrapOffset, []>; @@ -114,6 +179,7 @@ def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS", SDT_ZExtractAccess>; +def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>; def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>; def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; @@ -123,6 +189,80 @@ def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>; def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone, [SDNPHasChain, SDNPMayStore]>; +// Defined because the index is an i32 rather than a pointer. +def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", + SDT_ZInsertVectorElt>; +def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", + SDT_ZExtractVectorElt>; +def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>; +def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>; +def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>; +def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>; +def z_splat : SDNode<"SystemZISD::SPLAT", SDT_ZVecBinaryInt>; +def z_merge_high : SDNode<"SystemZISD::MERGE_HIGH", SDT_ZVecBinary>; +def z_merge_low : SDNode<"SystemZISD::MERGE_LOW", SDT_ZVecBinary>; +def z_shl_double : SDNode<"SystemZISD::SHL_DOUBLE", SDT_ZVecTernaryInt>; +def z_permute_dwords : SDNode<"SystemZISD::PERMUTE_DWORDS", + SDT_ZVecTernaryInt>; +def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>; +def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>; +def z_packs_cc : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; +def z_packls_cc : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; +def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>; +def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>; +def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>; +def z_unpackl_low : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnaryConv>; +def z_vshl_by_scalar : SDNode<"SystemZISD::VSHL_BY_SCALAR", + SDT_ZVecBinaryInt>; +def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR", + SDT_ZVecBinaryInt>; +def z_vsra_by_scalar : SDNode<"SystemZISD::VSRA_BY_SCALAR", + SDT_ZVecBinaryInt>; +def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>; +def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>; +def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>; +def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>; +def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>; +def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>; +def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>; +def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; +def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; +def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConv, + [SDNPOutGlue]>; +def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; +def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; +def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp, [SDNPOutGlue]>; +def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryInt, + [SDNPOutGlue]>; +def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryInt, + [SDNPOutGlue]>; +def z_vfee_cc : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vfeez_cc : SDNode<"SystemZISD::VFEEZ_CC", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vfene_cc : SDNode<"SystemZISD::VFENE_CC", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vfenez_cc : SDNode<"SystemZISD::VFENEZ_CC", SDT_ZVecBinary, + [SDNPOutGlue]>; +def z_vistr_cc : SDNode<"SystemZISD::VISTR_CC", SDT_ZVecUnary, + [SDNPOutGlue]>; +def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC", SDT_ZVecQuaternaryInt, + [SDNPOutGlue]>; +def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC", + SDT_ZVecQuaternaryInt, [SDNPOutGlue]>; +def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvInt, + [SDNPOutGlue]>; + class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW> : SDNode<"SystemZISD::"##name, profile, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; @@ -172,6 +312,19 @@ def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin, + [SDNPHasChain, SDNPOutGlue, SDNPMayStore, + SDNPSideEffect]>; +def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin, + [SDNPHasChain, SDNPOutGlue, SDNPMayStore, + SDNPSideEffect]>; +def z_tend : SDNode<"SystemZISD::TEND", SDTNone, + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; + +def z_vshl : SDNode<"ISD::SHL", SDT_ZVecBinary>; +def z_vsra : SDNode<"ISD::SRA", SDT_ZVecBinary>; +def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>; + //===----------------------------------------------------------------------===// // Pattern fragments //===----------------------------------------------------------------------===// @@ -195,11 +348,21 @@ def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>; def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>; def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>; +// Match extensions of an i32 to an i64, followed by an in-register sign +// extension from a sub-i32 value. +def sext8dbl : PatFrag<(ops node:$src), (sext8 (anyext node:$src))>; +def sext16dbl : PatFrag<(ops node:$src), (sext16 (anyext node:$src))>; + // Register zero-extend operations. Sub-32-bit values are represented as i32s. def zext8 : PatFrag<(ops node:$src), (and node:$src, 0xff)>; def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>; def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>; +// Match extensions of an i32 to an i64, followed by an AND of the low +// i8 or i16 part. +def zext8dbl : PatFrag<(ops node:$src), (zext8 (anyext node:$src))>; +def zext16dbl : PatFrag<(ops node:$src), (zext16 (anyext node:$src))>; + // Typed floating-point loads. def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>; def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>; @@ -363,6 +526,14 @@ def z_iabs64 : PatFrag<(ops node:$src), def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>; def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>; +// Integer multiply-and-add +def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (add (mul node:$src1, node:$src2), node:$src3)>; + +// Fused multiply-subtract, using the natural operand order. +def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fma node:$src1, node:$src2, (fneg node:$src3))>; + // Fused multiply-add and multiply-subtract, but with the order of the // operands matching SystemZ's MA and MS instructions. def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), @@ -383,3 +554,110 @@ class loadu<SDPatternOperator operator, SDPatternOperator load = load> class storeu<SDPatternOperator operator, SDPatternOperator store = store> : PatFrag<(ops node:$value, node:$addr), (store (operator node:$value), node:$addr)>; + +// Vector representation of all-zeros and all-ones. +def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>; +def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>; + +// Load a scalar and replicate it in all elements of a vector. +class z_replicate_load<ValueType scalartype, SDPatternOperator load> + : PatFrag<(ops node:$addr), + (z_replicate (scalartype (load node:$addr)))>; +def z_replicate_loadi8 : z_replicate_load<i32, anyextloadi8>; +def z_replicate_loadi16 : z_replicate_load<i32, anyextloadi16>; +def z_replicate_loadi32 : z_replicate_load<i32, load>; +def z_replicate_loadi64 : z_replicate_load<i64, load>; +def z_replicate_loadf32 : z_replicate_load<f32, load>; +def z_replicate_loadf64 : z_replicate_load<f64, load>; + +// Load a scalar and insert it into a single element of a vector. +class z_vle<ValueType scalartype, SDPatternOperator load> + : PatFrag<(ops node:$vec, node:$addr, node:$index), + (z_vector_insert node:$vec, (scalartype (load node:$addr)), + node:$index)>; +def z_vlei8 : z_vle<i32, anyextloadi8>; +def z_vlei16 : z_vle<i32, anyextloadi16>; +def z_vlei32 : z_vle<i32, load>; +def z_vlei64 : z_vle<i64, load>; +def z_vlef32 : z_vle<f32, load>; +def z_vlef64 : z_vle<f64, load>; + +// Load a scalar and insert it into the low element of the high i64 of a +// zeroed vector. +class z_vllez<ValueType scalartype, SDPatternOperator load, int index> + : PatFrag<(ops node:$addr), + (z_vector_insert (z_vzero), + (scalartype (load node:$addr)), (i32 index))>; +def z_vllezi8 : z_vllez<i32, anyextloadi8, 7>; +def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>; +def z_vllezi32 : z_vllez<i32, load, 1>; +def z_vllezi64 : PatFrag<(ops node:$addr), + (z_join_dwords (i64 (load node:$addr)), (i64 0))>; +// We use high merges to form a v4f32 from four f32s. Propagating zero +// into all elements but index 1 gives this expression. +def z_vllezf32 : PatFrag<(ops node:$addr), + (bitconvert + (z_merge_high + (v2i64 + (z_unpackl_high + (v4i32 + (bitconvert + (v4f32 (scalar_to_vector + (f32 (load node:$addr)))))))), + (v2i64 (z_vzero))))>; +def z_vllezf64 : PatFrag<(ops node:$addr), + (z_merge_high + (scalar_to_vector (f64 (load node:$addr))), + (z_vzero))>; + +// Store one element of a vector. +class z_vste<ValueType scalartype, SDPatternOperator store> + : PatFrag<(ops node:$vec, node:$addr, node:$index), + (store (scalartype (z_vector_extract node:$vec, node:$index)), + node:$addr)>; +def z_vstei8 : z_vste<i32, truncstorei8>; +def z_vstei16 : z_vste<i32, truncstorei16>; +def z_vstei32 : z_vste<i32, store>; +def z_vstei64 : z_vste<i64, store>; +def z_vstef32 : z_vste<f32, store>; +def z_vstef64 : z_vste<f64, store>; + +// Arithmetic negation on vectors. +def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>; + +// Bitwise negation on vectors. +def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>; + +// Signed "integer greater than zero" on vectors. +def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>; + +// Signed "integer less than zero" on vectors. +def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>; + +// Integer absolute on vectors. +class z_viabs<int shift> + : PatFrag<(ops node:$src), + (xor (add node:$src, (z_vsra_by_scalar node:$src, (i32 shift))), + (z_vsra_by_scalar node:$src, (i32 shift)))>; +def z_viabs8 : z_viabs<7>; +def z_viabs16 : z_viabs<15>; +def z_viabs32 : z_viabs<31>; +def z_viabs64 : z_viabs<63>; + +// Sign-extend the i64 elements of a vector. +class z_vse<int shift> + : PatFrag<(ops node:$src), + (z_vsra_by_scalar (z_vshl_by_scalar node:$src, shift), shift)>; +def z_vsei8 : z_vse<56>; +def z_vsei16 : z_vse<48>; +def z_vsei32 : z_vse<32>; + +// ...and again with the extensions being done on individual i64 scalars. +class z_vse_by_parts<SDPatternOperator operator, int index1, int index2> + : PatFrag<(ops node:$src), + (z_join_dwords + (operator (z_vector_extract node:$src, index1)), + (operator (z_vector_extract node:$src, index2)))>; +def z_vsei8_by_parts : z_vse_by_parts<sext8dbl, 7, 15>; +def z_vsei16_by_parts : z_vse_by_parts<sext16dbl, 3, 7>; +def z_vsei32_by_parts : z_vse_by_parts<sext32, 1, 3>; diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td index e307f8a888eed..16a7ed784d709 100644 --- a/lib/Target/SystemZ/SystemZPatterns.td +++ b/lib/Target/SystemZ/SystemZPatterns.td @@ -153,3 +153,17 @@ multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> { // The sign of the zero makes no difference. def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>; } + +// Use INSN for performing binary operation OPERATION of type VT +// on registers of class CLS. +class BinaryRRWithType<Instruction insn, RegisterOperand cls, + SDPatternOperator operator, ValueType vt> + : Pat<(vt (operator cls:$x, cls:$y)), (insn cls:$x, cls:$y)>; + +// Use INSN to perform conversion operation OPERATOR, with the input being +// TR2 and the output being TR1. SUPPRESS is 4 to suppress inexact conditions +// and 0 to allow them. MODE is the rounding mode to use. +class FPConversion<Instruction insn, SDPatternOperator operator, TypedReg tr1, + TypedReg tr2, bits<3> suppress, bits<4> mode> + : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))), + (insn tr2.op:$vec, suppress, mode)>; diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td index e6b58f17b0e68..32fbe5ae9ef91 100644 --- a/lib/Target/SystemZ/SystemZProcessors.td +++ b/lib/Target/SystemZ/SystemZProcessors.td @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// class SystemZFeature<string extname, string intname, string desc> - : Predicate<"Subtarget.has"##intname##"()">, + : Predicate<"Subtarget->has"##intname##"()">, AssemblerPredicate<"Feature"##intname, extname>, SubtargetFeature<extname, "Has"##intname, "true", desc>; class SystemZMissingFeature<string intname> - : Predicate<"!Subtarget.has"##intname##"()">; + : Predicate<"!Subtarget->has"##intname##"()">; def FeatureDistinctOps : SystemZFeature< "distinct-ops", "DistinctOps", @@ -39,6 +39,11 @@ def FeatureFPExtension : SystemZFeature< "Assume that the floating-point extension facility is installed" >; +def FeaturePopulationCount : SystemZFeature< + "population-count", "PopulationCount", + "Assume that the population-count facility is installed" +>; + def FeatureFastSerialization : SystemZFeature< "fast-serialization", "FastSerialization", "Assume that the fast-serialization facility is installed" @@ -50,13 +55,42 @@ def FeatureInterlockedAccess1 : SystemZFeature< >; def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">; +def FeatureMiscellaneousExtensions : SystemZFeature< + "miscellaneous-extensions", "MiscellaneousExtensions", + "Assume that the miscellaneous-extensions facility is installed" +>; + +def FeatureTransactionalExecution : SystemZFeature< + "transactional-execution", "TransactionalExecution", + "Assume that the transactional-execution facility is installed" +>; + +def FeatureProcessorAssist : SystemZFeature< + "processor-assist", "ProcessorAssist", + "Assume that the processor-assist facility is installed" +>; + +def FeatureVector : SystemZFeature< + "vector", "Vector", + "Assume that the vectory facility is installed" +>; +def FeatureNoVector : SystemZMissingFeature<"Vector">; + def : Processor<"generic", NoItineraries, []>; def : Processor<"z10", NoItineraries, []>; def : Processor<"z196", NoItineraries, [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, - FeatureFPExtension, FeatureFastSerialization, - FeatureInterlockedAccess1]>; + FeatureFPExtension, FeaturePopulationCount, + FeatureFastSerialization, FeatureInterlockedAccess1]>; def : Processor<"zEC12", NoItineraries, [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, - FeatureFPExtension, FeatureFastSerialization, - FeatureInterlockedAccess1]>; + FeatureFPExtension, FeaturePopulationCount, + FeatureFastSerialization, FeatureInterlockedAccess1, + FeatureMiscellaneousExtensions, + FeatureTransactionalExecution, FeatureProcessorAssist]>; +def : Processor<"z13", NoItineraries, + [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, + FeatureFPExtension, FeaturePopulationCount, + FeatureFastSerialization, FeatureInterlockedAccess1, + FeatureTransactionalExecution, FeatureProcessorAssist, + FeatureVector]>; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 64f5eebf37c14..7cabea962e911 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -28,7 +28,8 @@ SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } const uint32_t * -SystemZRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { +SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { return CSR_SystemZ_RegMask; } diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 212fe91f38abb..a0db5a9c188fb 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -43,9 +43,9 @@ public: bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override { return true; } - const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF = nullptr) const - override; - const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index 47ac20dae78ab..85aa0a62cc767 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -25,20 +25,24 @@ def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_ll32. def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_lh32. def subreg_l64 : SubRegIndex<64, 0>; def subreg_h64 : SubRegIndex<64, 64>; +def subreg_r32 : SubRegIndex<32, 32>; // Reinterpret a wider reg as 32 bits. +def subreg_r64 : SubRegIndex<64, 64>; // Reinterpret a wider reg as 64 bits. def subreg_hh32 : ComposedSubRegIndex<subreg_h64, subreg_h32>; def subreg_hl32 : ComposedSubRegIndex<subreg_h64, subreg_l32>; +def subreg_hr32 : ComposedSubRegIndex<subreg_h64, subreg_r32>; } -// Define a register class that contains values of type TYPE and an +// Define a register class that contains values of types TYPES and an // associated operand called NAME. SIZE is the size and alignment // of the registers and REGLIST is the list of individual registers. -multiclass SystemZRegClass<string name, ValueType type, int size, dag regList> { +multiclass SystemZRegClass<string name, list<ValueType> types, int size, + dag regList> { def AsmOperand : AsmOperandClass { let Name = name; let ParserMethod = "parse"##name; let RenderMethod = "addRegOperands"; } - def Bit : RegisterClass<"SystemZ", [type], size, regList> { + def Bit : RegisterClass<"SystemZ", types, size, regList> { let Size = size; } def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> { @@ -84,16 +88,19 @@ foreach I = [0, 2, 4, 6, 8, 10, 12, 14] in { /// Allocate the callee-saved R6-R13 backwards. That way they can be saved /// together with R14 and R15 in one prolog instruction. -defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uL", 0, 5), - (sequence "R%uL", 15, 6))>; -defm GRH32 : SystemZRegClass<"GRH32", i32, 32, (add (sequence "R%uH", 0, 5), - (sequence "R%uH", 15, 6))>; -defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5), - (sequence "R%uD", 15, 6))>; +defm GR32 : SystemZRegClass<"GR32", [i32], 32, + (add (sequence "R%uL", 0, 5), + (sequence "R%uL", 15, 6))>; +defm GRH32 : SystemZRegClass<"GRH32", [i32], 32, + (add (sequence "R%uH", 0, 5), + (sequence "R%uH", 15, 6))>; +defm GR64 : SystemZRegClass<"GR64", [i64], 64, + (add (sequence "R%uD", 0, 5), + (sequence "R%uD", 15, 6))>; // Combine the low and high GR32s into a single class. This can only be // used for virtual registers if the high-word facility is available. -defm GRX32 : SystemZRegClass<"GRX32", i32, 32, +defm GRX32 : SystemZRegClass<"GRX32", [i32], 32, (add (sequence "R%uL", 0, 5), (sequence "R%uH", 0, 5), R15L, R15H, R14L, R14H, R13L, R13H, @@ -102,18 +109,17 @@ defm GRX32 : SystemZRegClass<"GRX32", i32, 32, // The architecture doesn't really have any i128 support, so model the // register pairs as untyped instead. -defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q, - R12Q, R10Q, R8Q, R6Q, - R14Q)>; +defm GR128 : SystemZRegClass<"GR128", [untyped], 128, + (add R0Q, R2Q, R4Q, R12Q, R10Q, R8Q, R6Q, R14Q)>; // Base and index registers. Everything except R0, which in an address // context evaluates as 0. -defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0L)>; -defm ADDR64 : SystemZRegClass<"ADDR64", i64, 64, (sub GR64Bit, R0D)>; +defm ADDR32 : SystemZRegClass<"ADDR32", [i32], 32, (sub GR32Bit, R0L)>; +defm ADDR64 : SystemZRegClass<"ADDR64", [i64], 64, (sub GR64Bit, R0D)>; // Not used directly, but needs to exist for ADDR32 and ADDR64 subregs // of a GR128. -defm ADDR128 : SystemZRegClass<"ADDR128", untyped, 128, (sub GR128Bit, R0Q)>; +defm ADDR128 : SystemZRegClass<"ADDR128", [untyped], 128, (sub GR128Bit, R0Q)>; //===----------------------------------------------------------------------===// // Floating-point registers @@ -142,16 +148,36 @@ def F11Dwarf : DwarfMapping<29>; def F13Dwarf : DwarfMapping<30>; def F15Dwarf : DwarfMapping<31>; -// Lower 32 bits of one of the 16 64-bit floating-point registers +def F16Dwarf : DwarfMapping<68>; +def F18Dwarf : DwarfMapping<69>; +def F20Dwarf : DwarfMapping<70>; +def F22Dwarf : DwarfMapping<71>; + +def F17Dwarf : DwarfMapping<72>; +def F19Dwarf : DwarfMapping<73>; +def F21Dwarf : DwarfMapping<74>; +def F23Dwarf : DwarfMapping<75>; + +def F24Dwarf : DwarfMapping<76>; +def F26Dwarf : DwarfMapping<77>; +def F28Dwarf : DwarfMapping<78>; +def F30Dwarf : DwarfMapping<79>; + +def F25Dwarf : DwarfMapping<80>; +def F27Dwarf : DwarfMapping<81>; +def F29Dwarf : DwarfMapping<82>; +def F31Dwarf : DwarfMapping<83>; + +// Upper 32 bits of one of the floating-point registers class FPR32<bits<16> num, string n> : SystemZReg<n> { let HWEncoding = num; } -// One of the 16 64-bit floating-point registers -class FPR64<bits<16> num, string n, FPR32 low> - : SystemZRegWithSubregs<n, [low]> { +// One of the floating-point registers. +class FPR64<bits<16> num, string n, FPR32 high> + : SystemZRegWithSubregs<n, [high]> { let HWEncoding = num; - let SubRegIndices = [subreg_h32]; + let SubRegIndices = [subreg_r32]; } // 8 pairs of FPR64s, with a one-register gap inbetween. @@ -161,12 +187,17 @@ class FPR128<bits<16> num, string n, FPR64 low, FPR64 high> let SubRegIndices = [subreg_l64, subreg_h64]; } -// Floating-point registers +// Floating-point registers. Registers 16-31 require the vector facility. foreach I = 0-15 in { def F#I#S : FPR32<I, "f"#I>; def F#I#D : FPR64<I, "f"#I, !cast<FPR32>("F"#I#"S")>, DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>; } +foreach I = 16-31 in { + def F#I#S : FPR32<I, "v"#I>; + def F#I#D : FPR64<I, "v"#I, !cast<FPR32>("F"#I#"S")>, + DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>; +} foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in { def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#!add(I, 2)#"D"), @@ -175,10 +206,74 @@ foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in { // There's no store-multiple instruction for FPRs, so we're not fussy // about the order in which call-saved registers are allocated. -defm FP32 : SystemZRegClass<"FP32", f32, 32, (sequence "F%uS", 0, 15)>; -defm FP64 : SystemZRegClass<"FP64", f64, 64, (sequence "F%uD", 0, 15)>; -defm FP128 : SystemZRegClass<"FP128", f128, 128, (add F0Q, F1Q, F4Q, F5Q, - F8Q, F9Q, F12Q, F13Q)>; +defm FP32 : SystemZRegClass<"FP32", [f32], 32, (sequence "F%uS", 0, 15)>; +defm FP64 : SystemZRegClass<"FP64", [f64], 64, (sequence "F%uD", 0, 15)>; +defm FP128 : SystemZRegClass<"FP128", [f128], 128, + (add F0Q, F1Q, F4Q, F5Q, F8Q, F9Q, F12Q, F13Q)>; + +//===----------------------------------------------------------------------===// +// Vector registers +//===----------------------------------------------------------------------===// + +// A full 128-bit vector register, with an FPR64 as its high part. +class VR128<bits<16> num, string n, FPR64 high> + : SystemZRegWithSubregs<n, [high]> { + let HWEncoding = num; + let SubRegIndices = [subreg_r64]; +} + +// Full vector registers. +foreach I = 0-31 in { + def V#I : VR128<I, "v"#I, !cast<FPR64>("F"#I#"D")>, + DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>; +} + +// Class used to store 32-bit values in the first element of a vector +// register. f32 scalars are used for the WLEDB and WLDEB instructions. +defm VR32 : SystemZRegClass<"VR32", [f32, v4i8, v2i16], 32, + (add (sequence "F%uS", 0, 7), + (sequence "F%uS", 16, 31), + (sequence "F%uS", 8, 15))>; + +// Class used to store 64-bit values in the upper half of a vector register. +// The vector facility also includes scalar f64 instructions that operate +// on the full vector register set. +defm VR64 : SystemZRegClass<"VR64", [f64, v8i8, v4i16, v2i32, v2f32], 64, + (add (sequence "F%uD", 0, 7), + (sequence "F%uD", 16, 31), + (sequence "F%uD", 8, 15))>; + +// The subset of vector registers that can be used for floating-point +// operations too. +defm VF128 : SystemZRegClass<"VF128", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, + (sequence "V%u", 0, 15)>; + +// All vector registers. +defm VR128 : SystemZRegClass<"VR128", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, + (add (sequence "V%u", 0, 7), + (sequence "V%u", 16, 31), + (sequence "V%u", 8, 15))>; + +// Attaches a ValueType to a register operand, to make the instruction +// definitions easier. +class TypedReg<ValueType vtin, RegisterOperand opin> { + ValueType vt = vtin; + RegisterOperand op = opin; +} + +def v32eb : TypedReg<f32, VR32>; +def v64g : TypedReg<i64, VR64>; +def v64db : TypedReg<f64, VR64>; +def v128b : TypedReg<v16i8, VR128>; +def v128h : TypedReg<v8i16, VR128>; +def v128f : TypedReg<v4i32, VR128>; +def v128g : TypedReg<v2i64, VR128>; +def v128q : TypedReg<v16i8, VR128>; +def v128eb : TypedReg<v4f32, VR128>; +def v128db : TypedReg<v2f64, VR128>; +def v128any : TypedReg<untyped, VR128>; //===----------------------------------------------------------------------===// // Other registers diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index a3cba64b9ed20..e7e0268dbb8a1 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -46,10 +46,10 @@ static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence, // number of straight-line MVCs as 6 * 256 - 1. if (Size > 6 * 256) return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src, - DAG.getConstant(Size, PtrVT), - DAG.getConstant(Size / 256, PtrVT)); + DAG.getConstant(Size, DL, PtrVT), + DAG.getConstant(Size / 256, DL, PtrVT)); return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src, - DAG.getConstant(Size, PtrVT)); + DAG.getConstant(Size, DL, PtrVT)); } SDValue SystemZSelectionDAGInfo:: @@ -78,7 +78,8 @@ static SDValue memsetStore(SelectionDAG &DAG, SDLoc DL, SDValue Chain, for (unsigned I = 1; I < Size; ++I) StoreVal |= ByteVal << (I * 8); return DAG.getStore(Chain, DL, - DAG.getConstant(StoreVal, MVT::getIntegerVT(Size * 8)), + DAG.getConstant(StoreVal, DL, + MVT::getIntegerVT(Size * 8)), Dst, DstPtrInfo, false, false, Align); } @@ -103,7 +104,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, // we can move at most 2 halfwords. uint64_t ByteVal = CByte->getZExtValue(); if (ByteVal == 0 || ByteVal == 255 ? - Bytes <= 16 && CountPopulation_64(Bytes) <= 2 : + Bytes <= 16 && countPopulation(Bytes) <= 2 : Bytes <= 4) { unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes); unsigned Size2 = Bytes - Size1; @@ -112,7 +113,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, if (Size2 == 0) return Chain1; Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, - DAG.getConstant(Size1, PtrVT)); + DAG.getConstant(Size1, DL, PtrVT)); DstPtrInfo = DstPtrInfo.getWithOffset(Size1); SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, std::min(Align, Size1), DstPtrInfo); @@ -126,7 +127,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, if (Bytes == 1) return Chain1; SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, - DAG.getConstant(1, PtrVT)); + DAG.getConstant(1, DL, PtrVT)); SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, DstPtrInfo.getWithOffset(1), false, false, 1); @@ -146,7 +147,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, false, false, Align); SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, - DAG.getConstant(1, PtrVT)); + DAG.getConstant(1, DL, PtrVT)); return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, Chain, DstPlus1, Dst, Bytes - 1); } @@ -169,10 +170,10 @@ static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, // needs 2 branches, whereas a straight-line sequence would need 3 or more. if (Size > 3 * 256) return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2, - DAG.getConstant(Size, PtrVT), - DAG.getConstant(Size / 256, PtrVT)); + DAG.getConstant(Size, DL, PtrVT), + DAG.getConstant(Size / 256, DL, PtrVT)); return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2, - DAG.getConstant(Size, PtrVT)); + DAG.getConstant(Size, DL, PtrVT)); } // Convert the current CC value into an integer that is 0 if CC == 0, @@ -182,9 +183,9 @@ static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) { SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, - DAG.getConstant(SystemZ::IPM_CC, MVT::i32)); + DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, - DAG.getConstant(31, MVT::i32)); + DAG.getConstant(31, DL, MVT::i32)); return ROTL; } @@ -213,7 +214,7 @@ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, Length = DAG.getZExtOrTrunc(Length, DL, PtrVT); Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32); Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char, - DAG.getConstant(255, MVT::i32)); + DAG.getConstant(255, DL, MVT::i32)); SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length); SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, Limit, Src, Char); @@ -222,12 +223,10 @@ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, // Now select between End and null, depending on whether the character // was found. - SmallVector<SDValue, 5> Ops; - Ops.push_back(End); - Ops.push_back(DAG.getConstant(0, PtrVT)); - Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST, MVT::i32)); - Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32)); - Ops.push_back(Glue); + SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT), + DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), + DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), + Glue}; VTs = DAG.getVTList(PtrVT, MVT::Glue); End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); return std::make_pair(End, Chain); @@ -240,7 +239,7 @@ EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, MachinePointerInfo SrcPtrInfo, bool isStpcpy) const { SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other); SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src, - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(0, DL, MVT::i32)); return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1)); } @@ -251,7 +250,7 @@ EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, MachinePointerInfo Op2PtrInfo) const { SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue); SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2, - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(0, DL, MVT::i32)); Chain = Unused.getValue(1); SDValue Glue = Chain.getValue(2); return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); @@ -268,7 +267,7 @@ static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL, EVT PtrVT = Src.getValueType(); SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, - Limit, Src, DAG.getConstant(0, MVT::i32)); + Limit, Src, DAG.getConstant(0, DL, MVT::i32)); Chain = End.getValue(1); SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src); return std::make_pair(Len, Chain); @@ -278,7 +277,7 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Src, MachinePointerInfo SrcPtrInfo) const { EVT PtrVT = Src.getValueType(); - return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, PtrVT)); + return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, DL, PtrVT)); } std::pair<SDValue, SDValue> SystemZSelectionDAGInfo:: diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp index ec7a8c40d18a9..d1a17c5500d65 100644 --- a/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -15,6 +15,7 @@ #include "SystemZTargetMachine.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" using namespace llvm; @@ -36,6 +37,10 @@ public: private: bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther, unsigned LLIxL, unsigned LLIxH); + bool shortenOn0(MachineInstr &MI, unsigned Opcode); + bool shortenOn01(MachineInstr &MI, unsigned Opcode); + bool shortenOn001(MachineInstr &MI, unsigned Opcode); + bool shortenFPConv(MachineInstr &MI, unsigned Opcode); const SystemZInstrInfo *TII; @@ -97,6 +102,64 @@ bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap, return false; } +// Change MI's opcode to Opcode if register operand 0 has a 4-bit encoding. +bool SystemZShortenInst::shortenOn0(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16) { + MI.setDesc(TII->get(Opcode)); + return true; + } + return false; +} + +// Change MI's opcode to Opcode if register operands 0 and 1 have a +// 4-bit encoding. +bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && + SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) { + MI.setDesc(TII->get(Opcode)); + return true; + } + return false; +} + +// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a +// 4-bit encoding and if operands 0 and 1 are tied. +bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && + MI.getOperand(1).getReg() == MI.getOperand(0).getReg() && + SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) { + MI.setDesc(TII->get(Opcode)); + return true; + } + return false; +} + +// MI is a vector-style conversion instruction with the operand order: +// destination, source, exact-suppress, rounding-mode. If both registers +// have a 4-bit encoding then change it to Opcode, which has operand order: +// destination, rouding-mode, source, exact-suppress. +bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) { + if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && + SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) { + MachineOperand Dest(MI.getOperand(0)); + MachineOperand Src(MI.getOperand(1)); + MachineOperand Suppress(MI.getOperand(2)); + MachineOperand Mode(MI.getOperand(3)); + MI.RemoveOperand(3); + MI.RemoveOperand(2); + MI.RemoveOperand(1); + MI.RemoveOperand(0); + MI.setDesc(TII->get(Opcode)); + MachineInstrBuilder(*MI.getParent()->getParent(), &MI) + .addOperand(Dest) + .addOperand(Mode) + .addOperand(Src) + .addOperand(Suppress); + return true; + } + return false; +} + // Process all instructions in MBB. Return true if something changed. bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { bool Changed = false; @@ -117,13 +180,83 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { // Iterate backwards through the block looking for instructions to change. for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) { MachineInstr &MI = *MBBI; - unsigned Opcode = MI.getOpcode(); - if (Opcode == SystemZ::IILF) + switch (MI.getOpcode()) { + case SystemZ::IILF: Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL, SystemZ::LLILH); - else if (Opcode == SystemZ::IIHF) + break; + + case SystemZ::IIHF: Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL, SystemZ::LLIHH); + break; + + case SystemZ::WFADB: + Changed |= shortenOn001(MI, SystemZ::ADBR); + break; + + case SystemZ::WFDDB: + Changed |= shortenOn001(MI, SystemZ::DDBR); + break; + + case SystemZ::WFIDB: + Changed |= shortenFPConv(MI, SystemZ::FIDBRA); + break; + + case SystemZ::WLDEB: + Changed |= shortenOn01(MI, SystemZ::LDEBR); + break; + + case SystemZ::WLEDB: + Changed |= shortenFPConv(MI, SystemZ::LEDBRA); + break; + + case SystemZ::WFMDB: + Changed |= shortenOn001(MI, SystemZ::MDBR); + break; + + case SystemZ::WFLCDB: + Changed |= shortenOn01(MI, SystemZ::LCDBR); + break; + + case SystemZ::WFLNDB: + Changed |= shortenOn01(MI, SystemZ::LNDBR); + break; + + case SystemZ::WFLPDB: + Changed |= shortenOn01(MI, SystemZ::LPDBR); + break; + + case SystemZ::WFSQDB: + Changed |= shortenOn01(MI, SystemZ::SQDBR); + break; + + case SystemZ::WFSDB: + Changed |= shortenOn001(MI, SystemZ::SDBR); + break; + + case SystemZ::WFCDB: + Changed |= shortenOn01(MI, SystemZ::CDBR); + break; + + case SystemZ::VL32: + // For z13 we prefer LDE over LE to avoid partial register dependencies. + Changed |= shortenOn0(MI, SystemZ::LDE32); + break; + + case SystemZ::VST32: + Changed |= shortenOn0(MI, SystemZ::STE); + break; + + case SystemZ::VL64: + Changed |= shortenOn0(MI, SystemZ::LD); + break; + + case SystemZ::VST64: + Changed |= shortenOn0(MI, SystemZ::STD); + break; + } + unsigned UsedLow = 0; unsigned UsedHigh = 0; for (auto MOI = MI.operands_begin(), MOE = MI.operands_end(); diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp index e160bc86f2251..05aede3deb4f3 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -10,7 +10,6 @@ #include "SystemZSubtarget.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/Support/Host.h" using namespace llvm; @@ -28,10 +27,6 @@ SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { std::string CPUName = CPU; if (CPUName.empty()) CPUName = "generic"; -#if defined(__linux__) && defined(__s390x__) - if (CPUName == "generic") - CPUName = sys::getHostCPUName(); -#endif // Parse features string. ParseSubtargetFeatures(CPUName, FS); return *this; @@ -43,14 +38,12 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT, const TargetMachine &TM) : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), - HasFastSerialization(false), HasInterlockedAccess1(false), - TargetTriple(TT), - // Make sure that global data has at least 16 bits of alignment by - // default, so that we can refer to it using LARL. We don't have any - // special requirements for stack variables though. - DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM), - TSInfo(DL), FrameLowering() {} + HasPopulationCount(false), HasFastSerialization(false), + HasInterlockedAccess1(false), HasMiscellaneousExtensions(false), + HasTransactionalExecution(false), HasProcessorAssist(false), + HasVector(false), + TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), + TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() {} // Return true if GV binds locally under reloc model RM. static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) { diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index f8815524e0f32..9a1f593f52657 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -38,12 +38,16 @@ protected: bool HasLoadStoreOnCond; bool HasHighWord; bool HasFPExtension; + bool HasPopulationCount; bool HasFastSerialization; bool HasInterlockedAccess1; + bool HasMiscellaneousExtensions; + bool HasTransactionalExecution; + bool HasProcessorAssist; + bool HasVector; private: Triple TargetTriple; - const DataLayout DL; SystemZInstrInfo InstrInfo; SystemZTargetLowering TLInfo; SystemZSelectionDAGInfo TSInfo; @@ -59,7 +63,6 @@ public: return &FrameLowering; } const SystemZInstrInfo *getInstrInfo() const override { return &InstrInfo; } - const DataLayout *getDataLayout() const override { return &DL; } const SystemZRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } @@ -88,12 +91,29 @@ public: // Return true if the target has the floating-point extension facility. bool hasFPExtension() const { return HasFPExtension; } + // Return true if the target has the population-count facility. + bool hasPopulationCount() const { return HasPopulationCount; } + // Return true if the target has the fast-serialization facility. bool hasFastSerialization() const { return HasFastSerialization; } // Return true if the target has interlocked-access facility 1. bool hasInterlockedAccess1() const { return HasInterlockedAccess1; } + // Return true if the target has the miscellaneous-extensions facility. + bool hasMiscellaneousExtensions() const { + return HasMiscellaneousExtensions; + } + + // Return true if the target has the transactional-execution facility. + bool hasTransactionalExecution() const { return HasTransactionalExecution; } + + // Return true if the target has the processor-assist facility. + bool hasProcessorAssist() const { return HasProcessorAssist; } + + // Return true if the target has the vector facility. + bool hasVector() const { return HasVector; } + // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index a210074484e7f..a34cdaf8030d2 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "SystemZTargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Transforms/Scalar.h" @@ -20,12 +21,71 @@ extern "C" void LLVMInitializeSystemZTarget() { RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget); } +// Determine whether we use the vector ABI. +static bool UsesVectorABI(StringRef CPU, StringRef FS) { + // We use the vector ABI whenever the vector facility is avaiable. + // This is the case by default if CPU is z13 or later, and can be + // overridden via "[+-]vector" feature string elements. + bool VectorABI = true; + if (CPU.empty() || CPU == "generic" || + CPU == "z10" || CPU == "z196" || CPU == "zEC12") + VectorABI = false; + + SmallVector<StringRef, 3> Features; + FS.split(Features, ",", -1, false /* KeepEmpty */); + for (auto &Feature : Features) { + if (Feature == "vector" || Feature == "+vector") + VectorABI = true; + if (Feature == "-vector") + VectorABI = false; + } + + return VectorABI; +} + +static std::string computeDataLayout(StringRef TT, StringRef CPU, + StringRef FS) { + const Triple Triple(TT); + bool VectorABI = UsesVectorABI(CPU, FS); + std::string Ret = ""; + + // Big endian. + Ret += "E"; + + // Data mangling. + Ret += DataLayout::getManglingComponent(Triple); + + // Make sure that global data has at least 16 bits of alignment by + // default, so that we can refer to it using LARL. We don't have any + // special requirements for stack variables though. + Ret += "-i1:8:16-i8:8:16"; + + // 64-bit integers are naturally aligned. + Ret += "-i64:64"; + + // 128-bit floats are aligned only to 64 bits. + Ret += "-f128:64"; + + // When using the vector ABI, 128-bit vectors are also aligned to 64 bits. + if (VectorABI) + Ret += "-v128:64"; + + // We prefer 16 bits of aligned for all globals; see above. + Ret += "-a:8:16"; + + // Integer registers are 32 or 64 bits. + Ret += "-n32:64"; + + return Ret; +} + SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + : LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS), + TT, CPU, FS, Options, RM, CM, OL), TLOF(make_unique<TargetLoweringObjectFileELF>()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); @@ -57,6 +117,10 @@ void SystemZPassConfig::addIRPasses() { bool SystemZPassConfig::addInstSelector() { addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel())); + + if (getOptLevel() != CodeGenOpt::None) + addPass(createSystemZLDCleanupPass(getSystemZTargetMachine())); + return false; } @@ -100,3 +164,9 @@ void SystemZPassConfig::addPreEmitPass() { TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { return new SystemZPassConfig(this, PM); } + +TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](Function &F) { + return TargetTransformInfo(SystemZTTIImpl(this, F)); + }); +} diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 9fae5e43e754d..5ded07c1efb21 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -33,12 +33,13 @@ public: CodeGenOpt::Level OL); ~SystemZTargetMachine() override; - // Override TargetMachine. - const SystemZSubtarget *getSubtargetImpl() const override { + const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; } + const SystemZSubtarget *getSubtargetImpl(const Function &) const override { return &Subtarget; } // Override LLVMTargetMachine TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + TargetIRAnalysis getTargetIRAnalysis() override; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp new file mode 100644 index 0000000000000..5a87df1976c34 --- /dev/null +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -0,0 +1,258 @@ +//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a TargetTransformInfo analysis pass specific to the +// SystemZ target machine. It uses the target's detailed information to provide +// more precise answers to certain TTI queries, while letting the target +// independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/CostTable.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +#define DEBUG_TYPE "systemztti" + +//===----------------------------------------------------------------------===// +// +// SystemZ cost model. +// +//===----------------------------------------------------------------------===// + +unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented yet. + if (BitSize > 64) + return TTI::TCC_Free; + + if (Imm == 0) + return TTI::TCC_Free; + + if (Imm.getBitWidth() <= 64) { + // Constants loaded via lgfi. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Basic; + // Constants loaded via llilf. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Basic; + // Constants loaded via llihf: + if ((Imm.getZExtValue() & 0xffffffff) == 0) + return TTI::TCC_Basic; + + return 2 * TTI::TCC_Basic; + } + + return 4 * TTI::TCC_Basic; +} + +unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented yet. + if (BitSize > 64) + return TTI::TCC_Free; + + switch (Opcode) { + default: + return TTI::TCC_Free; + case Instruction::GetElementPtr: + // Always hoist the base address of a GetElementPtr. This prevents the + // creation of new constants for every base constant that gets constant + // folded with the offset. + if (Idx == 0) + return 2 * TTI::TCC_Basic; + return TTI::TCC_Free; + case Instruction::Store: + if (Idx == 0 && Imm.getBitWidth() <= 64) { + // Any 8-bit immediate store can by implemented via mvi. + if (BitSize == 8) + return TTI::TCC_Free; + // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi. + if (isInt<16>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::ICmp: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Comparisons against signed 32-bit immediates implemented via cgfi. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + // Comparisons against unsigned 32-bit immediates implemented via clgfi. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Add: + case Instruction::Sub: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // We use algfi/slgfi to add/subtract 32-bit unsigned immediates. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + // Or their negation, by swapping addition vs. subtraction. + if (isUInt<32>(-Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Mul: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // We use msgfi to multiply by 32-bit signed immediates. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Or: + case Instruction::Xor: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Masks supported by oilf/xilf. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + // Masks supported by oihf/xihf. + if ((Imm.getZExtValue() & 0xffffffff) == 0) + return TTI::TCC_Free; + } + break; + case Instruction::And: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Any 32-bit AND operation can by implemented via nilf. + if (BitSize <= 32) + return TTI::TCC_Free; + // 64-bit masks supported by nilf. + if (isUInt<32>(~Imm.getZExtValue())) + return TTI::TCC_Free; + // 64-bit masks supported by nilh. + if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff) + return TTI::TCC_Free; + // Some 64-bit AND operations can be implemented via risbg. + const SystemZInstrInfo *TII = ST->getInstrInfo(); + unsigned Start, End; + if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End)) + return TTI::TCC_Free; + } + break; + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + // Always return TCC_Free for the shift value of a shift instruction. + if (Idx == 1) + return TTI::TCC_Free; + break; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::IntToPtr: + case Instruction::PtrToInt: + case Instruction::BitCast: + case Instruction::PHI: + case Instruction::Call: + case Instruction::Select: + case Instruction::Ret: + case Instruction::Load: + break; + } + + return SystemZTTIImpl::getIntImmCost(Imm, Ty); +} + +unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented yet. + if (BitSize > 64) + return TTI::TCC_Free; + + switch (IID) { + default: + return TTI::TCC_Free; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + // These get expanded to include a normal addition/subtraction. + if (Idx == 1 && Imm.getBitWidth() <= 64) { + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + if (isUInt<32>(-Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + // These get expanded to include a normal multiplication. + if (Idx == 1 && Imm.getBitWidth() <= 64) { + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Intrinsic::experimental_stackmap: + if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TTI::TCC_Free; + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TTI::TCC_Free; + break; + } + return SystemZTTIImpl::getIntImmCost(Imm, Ty); +} + +TargetTransformInfo::PopcntSupportKind +SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) { + assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2"); + if (ST->hasPopulationCount() && TyWidth <= 64) + return TTI::PSK_FastHardware; + return TTI::PSK_Software; +} + +unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) { + if (!Vector) + // Discount the stack pointer. Also leave out %r0, since it can't + // be used in an address. + return 14; + if (ST->hasVector()) + return 32; + return 0; +} + +unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) { + if (!Vector) + return 64; + if (ST->hasVector()) + return 128; + return 0; +} + diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h new file mode 100644 index 0000000000000..e9cabe968eea2 --- /dev/null +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -0,0 +1,78 @@ +//===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H + +#include "SystemZTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" + +namespace llvm { + +class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> { + typedef BasicTTIImplBase<SystemZTTIImpl> BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const SystemZSubtarget *ST; + const SystemZTargetLowering *TLI; + + const SystemZSubtarget *getST() const { return ST; } + const SystemZTargetLowering *getTLI() const { return TLI; } + +public: + explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F) + : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + + // Provide value semantics. MSVC requires that we spell all of these out. + SystemZTTIImpl(const SystemZTTIImpl &Arg) + : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} + SystemZTTIImpl(SystemZTTIImpl &&Arg) + : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), + TLI(std::move(Arg.TLI)) {} + SystemZTTIImpl &operator=(const SystemZTTIImpl &RHS) { + BaseT::operator=(static_cast<const BaseT &>(RHS)); + ST = RHS.ST; + TLI = RHS.TLI; + return *this; + } + SystemZTTIImpl &operator=(SystemZTTIImpl &&RHS) { + BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); + ST = std::move(RHS.ST); + TLI = std::move(RHS.TLI); + return *this; + } + + /// \name Scalar TTI Implementations + /// @{ + + unsigned getIntImmCost(const APInt &Imm, Type *Ty); + + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty); + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty); + + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + unsigned getNumberOfRegisters(bool Vector); + unsigned getRegisterBitWidth(bool Vector); + + /// @} +}; + +} // end namespace llvm + +#endif |
